Skip to main content

lance_namespace_impls/
dir.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Directory-based Lance Namespace implementation.
5//!
6//! This module provides a directory-based implementation of the Lance namespace
7//! that stores tables as Lance datasets in a filesystem directory structure.
8
9pub mod manifest;
10
11use arrow::array::Float32Array;
12use arrow::record_batch::RecordBatchIterator;
13use arrow_ipc::reader::StreamReader;
14use async_trait::async_trait;
15use bytes::Bytes;
16use futures::{StreamExt, TryStreamExt};
17use lance::dataset::builder::DatasetBuilder;
18use lance::dataset::scanner::Scanner;
19use lance::dataset::statistics::DatasetStatisticsExt;
20use lance::dataset::transaction::{Operation, Transaction};
21use lance::dataset::{
22    Dataset, MergeInsertBuilder, WhenMatched, WhenNotMatched, WhenNotMatchedBySource, WriteMode,
23    WriteParams,
24};
25use lance::index::{DatasetIndexExt, IndexParams, vector::VectorIndexParams};
26use lance::session::Session;
27use lance_index::scalar::{
28    BuiltinIndexType, FullTextSearchQuery, InvertedIndexParams, ScalarIndexParams,
29};
30use lance_index::vector::{
31    bq::RQBuildParams, hnsw::builder::HnswBuildParams, ivf::IvfBuildParams, pq::PQBuildParams,
32    sq::builder::SQBuildParams,
33};
34use lance_index::{IndexType, is_system_index};
35use lance_io::object_store::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry};
36use lance_linalg::distance::MetricType;
37use lance_table::io::commit::{ManifestNamingScheme, VERSIONS_DIR};
38use object_store::ObjectStoreExt;
39use object_store::path::Path;
40use object_store::{Error as ObjectStoreError, ObjectStore as OSObjectStore, PutMode, PutOptions};
41use std::collections::HashMap;
42use std::io::Cursor;
43use std::sync::{Arc, Mutex};
44
45use crate::context::DynamicContextProvider;
46use lance_namespace::models::{
47    AnalyzeTableQueryPlanRequest, BatchDeleteTableVersionsRequest,
48    BatchDeleteTableVersionsResponse, CountTableRowsRequest, CreateNamespaceRequest,
49    CreateNamespaceResponse, CreateTableIndexRequest, CreateTableIndexResponse, CreateTableRequest,
50    CreateTableResponse, CreateTableScalarIndexResponse, CreateTableVersionRequest,
51    CreateTableVersionResponse, DeclareTableRequest, DeclareTableResponse,
52    DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableIndexStatsRequest,
53    DescribeTableIndexStatsResponse, DescribeTableRequest, DescribeTableResponse,
54    DescribeTableVersionRequest, DescribeTableVersionResponse, DescribeTransactionRequest,
55    DescribeTransactionResponse, DropNamespaceRequest, DropNamespaceResponse,
56    DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, DropTableResponse,
57    ExplainTableQueryPlanRequest, FragmentStats, FragmentSummary, GetTableStatsRequest,
58    GetTableStatsResponse, Identity, IndexContent, InsertIntoTableRequest, InsertIntoTableResponse,
59    ListNamespacesRequest, ListNamespacesResponse, ListTableIndicesRequest,
60    ListTableIndicesResponse, ListTableVersionsRequest, ListTableVersionsResponse,
61    ListTablesRequest, ListTablesResponse, MergeInsertIntoTableRequest,
62    MergeInsertIntoTableResponse, NamespaceExistsRequest, QueryTableRequest,
63    QueryTableRequestColumns, QueryTableRequestVector, RestoreTableRequest, RestoreTableResponse,
64    TableExistsRequest, TableVersion, UpdateTableSchemaMetadataRequest,
65    UpdateTableSchemaMetadataResponse,
66};
67
68use lance_core::{Error, Result};
69use lance_namespace::LanceNamespace;
70use lance_namespace::error::NamespaceError;
71use lance_namespace::schema::arrow_schema_to_json;
72
73use crate::credentials::{
74    CredentialVendor, create_credential_vendor_for_location, has_credential_vendor_config,
75};
76
77/// Thread-safe metrics tracker for namespace operations.
78///
79/// Tracks the count of each API operation when `ops_metrics_enabled` is true.
80/// Use `retrieve()` to get a snapshot of all operation counts.
81#[derive(Debug, Default)]
82pub struct OpsMetrics {
83    counters: Mutex<HashMap<String, u64>>,
84}
85
86impl OpsMetrics {
87    /// Increment the counter for an operation.
88    pub fn increment(&self, operation: &str) {
89        if let Ok(mut counters) = self.counters.lock() {
90            *counters.entry(operation.to_string()).or_insert(0) += 1;
91        }
92    }
93
94    /// Get a snapshot of all operation counts.
95    pub fn retrieve(&self) -> HashMap<String, u64> {
96        self.counters.lock().map(|c| c.clone()).unwrap_or_default()
97    }
98
99    /// Reset all counters to zero.
100    pub fn reset(&self) {
101        if let Ok(mut counters) = self.counters.lock() {
102            counters.clear();
103        }
104    }
105}
106
107/// Result of checking table status atomically.
108///
109/// This struct captures the state of a table directory in a single snapshot,
110/// avoiding race conditions between checking existence and other status flags.
111pub(crate) struct TableStatus {
112    /// Whether the table directory exists (has any files)
113    pub(crate) exists: bool,
114    /// Whether the table has a `.lance-deregistered` marker file
115    pub(crate) is_deregistered: bool,
116    /// Whether the table has a `.lance-reserved` marker file (declared but not written)
117    pub(crate) has_reserved_file: bool,
118}
119
120enum DirectoryIndexParams {
121    Scalar {
122        index_type: IndexType,
123        params: ScalarIndexParams,
124    },
125    Inverted(InvertedIndexParams),
126    Vector {
127        index_type: IndexType,
128        params: VectorIndexParams,
129    },
130}
131
132impl DirectoryIndexParams {
133    fn index_type(&self) -> IndexType {
134        match self {
135            Self::Scalar { index_type, .. } | Self::Vector { index_type, .. } => *index_type,
136            Self::Inverted(_) => IndexType::Inverted,
137        }
138    }
139
140    fn params(&self) -> &dyn IndexParams {
141        match self {
142            Self::Scalar { params, .. } => params,
143            Self::Inverted(params) => params,
144            Self::Vector { params, .. } => params,
145        }
146    }
147}
148
149/// Builder for creating a DirectoryNamespace.
150///
151/// This builder provides a fluent API for configuring and establishing
152/// connections to directory-based Lance namespaces.
153///
154/// # Examples
155///
156/// ```no_run
157/// # use lance_namespace_impls::DirectoryNamespaceBuilder;
158/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
159/// // Create a local directory namespace
160/// let namespace = DirectoryNamespaceBuilder::new("/path/to/data")
161///     .build()
162///     .await?;
163/// # Ok(())
164/// # }
165/// ```
166///
167/// ```no_run
168/// # use lance_namespace_impls::DirectoryNamespaceBuilder;
169/// # use lance::session::Session;
170/// # use std::sync::Arc;
171/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
172/// // Create with custom storage options and session
173/// let session = Arc::new(Session::default());
174/// let namespace = DirectoryNamespaceBuilder::new("s3://bucket/path")
175///     .storage_option("region", "us-west-2")
176///     .storage_option("access_key_id", "key")
177///     .session(session)
178///     .build()
179///     .await?;
180/// # Ok(())
181/// # }
182/// ```
183#[derive(Clone)]
184pub struct DirectoryNamespaceBuilder {
185    root: String,
186    storage_options: Option<HashMap<String, String>>,
187    session: Option<Arc<Session>>,
188    manifest_enabled: bool,
189    dir_listing_enabled: bool,
190    inline_optimization_enabled: bool,
191    table_version_tracking_enabled: bool,
192    /// When true, table versions are stored in the `__manifest` table instead of
193    /// relying on Lance's native version management.
194    table_version_storage_enabled: bool,
195    /// When true, enables migration mode where the namespace checks the manifest first
196    /// before falling back to directory listing for root-level tables. When false (default),
197    /// root-level tables use directory listing directly without checking the manifest,
198    /// avoiding extra object store calls.
199    dir_listing_to_manifest_migration_enabled: bool,
200    credential_vendor_properties: HashMap<String, String>,
201    context_provider: Option<Arc<dyn DynamicContextProvider>>,
202    commit_retries: Option<u32>,
203    /// When true, returns input storage options in describe_table/declare_table responses
204    /// when no credential vendor is configured. Useful for testing. Default: false.
205    vend_input_storage_options: bool,
206    /// When set, adds expires_at_millis to vended storage options. The value is calculated
207    /// as current_time_millis + this interval. This allows clients to know when to refresh
208    /// credentials by calling describe_table again. Only effective when vend_input_storage_options
209    /// is true.
210    vend_input_storage_options_refresh_interval_millis: Option<u64>,
211    /// When true, tracks operation metrics. Default: false.
212    ops_metrics_enabled: bool,
213}
214
215impl std::fmt::Debug for DirectoryNamespaceBuilder {
216    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
217        f.debug_struct("DirectoryNamespaceBuilder")
218            .field("root", &self.root)
219            .field("storage_options", &self.storage_options)
220            .field("manifest_enabled", &self.manifest_enabled)
221            .field("dir_listing_enabled", &self.dir_listing_enabled)
222            .field(
223                "inline_optimization_enabled",
224                &self.inline_optimization_enabled,
225            )
226            .field(
227                "table_version_tracking_enabled",
228                &self.table_version_tracking_enabled,
229            )
230            .field(
231                "table_version_storage_enabled",
232                &self.table_version_storage_enabled,
233            )
234            .field(
235                "dir_listing_to_manifest_migration_enabled",
236                &self.dir_listing_to_manifest_migration_enabled,
237            )
238            .field(
239                "context_provider",
240                &self.context_provider.as_ref().map(|_| "Some(...)"),
241            )
242            .field(
243                "vend_input_storage_options",
244                &self.vend_input_storage_options,
245            )
246            .field(
247                "vend_input_storage_options_refresh_interval_millis",
248                &self.vend_input_storage_options_refresh_interval_millis,
249            )
250            .field("ops_metrics_enabled", &self.ops_metrics_enabled)
251            .finish()
252    }
253}
254
255impl DirectoryNamespaceBuilder {
256    /// Create a new DirectoryNamespaceBuilder with the specified root path.
257    ///
258    /// # Arguments
259    ///
260    /// * `root` - Root directory path (local path or cloud URI like s3://bucket/path)
261    pub fn new(root: impl Into<String>) -> Self {
262        Self {
263            root: root.into().trim_end_matches('/').to_string(),
264            storage_options: None,
265            session: None,
266            manifest_enabled: true,
267            dir_listing_enabled: true, // Default to enabled for backwards compatibility
268            inline_optimization_enabled: true,
269            table_version_tracking_enabled: false, // Default to disabled
270            table_version_storage_enabled: false,  // Default to disabled
271            dir_listing_to_manifest_migration_enabled: false, // Default to disabled
272            credential_vendor_properties: HashMap::new(),
273            context_provider: None,
274            commit_retries: None,
275            vend_input_storage_options: false,
276            vend_input_storage_options_refresh_interval_millis: None,
277            ops_metrics_enabled: false,
278        }
279    }
280
281    /// Enable or disable manifest-based listing.
282    ///
283    /// When enabled (default), the namespace uses a `__manifest` table to track tables.
284    /// When disabled, relies solely on directory scanning.
285    pub fn manifest_enabled(mut self, enabled: bool) -> Self {
286        self.manifest_enabled = enabled;
287        self
288    }
289
290    /// Enable or disable directory-based listing fallback.
291    ///
292    /// When enabled (default), falls back to directory scanning for tables not in the manifest.
293    /// When disabled, only consults the manifest table.
294    pub fn dir_listing_enabled(mut self, enabled: bool) -> Self {
295        self.dir_listing_enabled = enabled;
296        self
297    }
298
299    /// Enable or disable migration mode from directory listing to manifest.
300    ///
301    /// When enabled, root-level table operations check the manifest first before
302    /// falling back to directory listing. When disabled (default), root-level tables
303    /// use directory listing directly, avoiding extra object store calls.
304    /// Only relevant when both `manifest_enabled` and `dir_listing_enabled` are true.
305    pub fn dir_listing_to_manifest_migration_enabled(mut self, enabled: bool) -> Self {
306        self.dir_listing_to_manifest_migration_enabled = enabled;
307        self
308    }
309
310    /// Enable or disable inline optimization of the __manifest table.
311    ///
312    /// When enabled (default), performs compaction and indexing on the __manifest table
313    /// after every write operation to maintain optimal performance.
314    /// When disabled, manual optimization must be performed separately.
315    pub fn inline_optimization_enabled(mut self, enabled: bool) -> Self {
316        self.inline_optimization_enabled = enabled;
317        self
318    }
319
320    /// Enable or disable table version tracking through the namespace.
321    ///
322    /// When enabled, `describe_table` returns `managed_versioning: true` to indicate
323    /// that commits should go through the namespace's table version APIs rather than
324    /// direct object store operations.
325    ///
326    /// When disabled (default), `managed_versioning` is not set.
327    pub fn table_version_tracking_enabled(mut self, enabled: bool) -> Self {
328        self.table_version_tracking_enabled = enabled;
329        self
330    }
331
332    /// Enable or disable table version management through the `__manifest` table.
333    ///
334    /// When enabled, table versions are tracked as `table_version` entries in the
335    /// `__manifest` Lance table. This enables:
336    /// - Centralized version tracking instead of per-table `_versions/` directories
337    ///
338    /// Requires `manifest_enabled` to be true.
339    /// When disabled (default), version storage uses per-table storage operations.
340    pub fn table_version_storage_enabled(mut self, enabled: bool) -> Self {
341        self.table_version_storage_enabled = enabled;
342        self
343    }
344
345    /// Create a DirectoryNamespaceBuilder from properties HashMap.
346    ///
347    /// This method parses a properties map into builder configuration.
348    /// It expects:
349    /// - `root`: The root directory path (required)
350    /// - `manifest_enabled`: Enable manifest-based table tracking (optional, default: true)
351    /// - `dir_listing_enabled`: Enable directory listing for table discovery (optional, default: true)
352    /// - `inline_optimization_enabled`: Enable inline optimization of __manifest table (optional, default: true)
353    /// - `storage.*`: Storage options (optional, prefix will be stripped)
354    ///
355    /// Credential vendor properties (prefixed with `credential_vendor.`, prefix is stripped):
356    /// - `credential_vendor.enabled`: Set to "true" to enable credential vending (required)
357    /// - `credential_vendor.permission`: Permission level: read, write, or admin (default: read)
358    ///
359    /// AWS-specific properties (for s3:// locations):
360    /// - `credential_vendor.aws_role_arn`: AWS IAM role ARN (required for AWS)
361    /// - `credential_vendor.aws_external_id`: AWS external ID (optional)
362    /// - `credential_vendor.aws_region`: AWS region (optional)
363    /// - `credential_vendor.aws_role_session_name`: AWS role session name (optional)
364    /// - `credential_vendor.aws_duration_millis`: Credential duration in ms (default: 3600000, range: 15min-12hrs)
365    ///
366    /// GCP-specific properties (for gs:// locations):
367    /// - `credential_vendor.gcp_service_account`: Service account to impersonate (optional)
368    /// - `credential_vendor.gcp_workload_identity_provider`: Workload Identity Provider for OIDC token exchange (optional)
369    /// - `credential_vendor.gcp_impersonation_service_account`: Service account to impersonate after workload identity exchange (optional)
370    ///
371    /// Note: GCP uses Application Default Credentials (ADC). To use a service account key file,
372    /// set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable before starting.
373    /// GCP token duration cannot be configured; it's determined by the STS endpoint (typically 1 hour).
374    ///
375    /// Azure-specific properties (for az:// locations):
376    /// - `credential_vendor.azure_account_name`: Azure storage account name (required for Azure)
377    /// - `credential_vendor.azure_tenant_id`: Azure tenant ID (optional)
378    /// - `credential_vendor.azure_federated_client_id`: Client ID used for workload identity federation (optional)
379    /// - `credential_vendor.azure_duration_millis`: Credential duration in ms (default: 3600000, up to 7 days)
380    ///
381    /// # Arguments
382    ///
383    /// * `properties` - Configuration properties
384    /// * `session` - Optional Lance session to reuse object store registry
385    ///
386    /// # Returns
387    ///
388    /// Returns a `DirectoryNamespaceBuilder` instance.
389    ///
390    /// # Errors
391    ///
392    /// Returns an error if the `root` property is missing.
393    ///
394    /// # Examples
395    ///
396    /// ```no_run
397    /// # use lance_namespace_impls::DirectoryNamespaceBuilder;
398    /// # use std::collections::HashMap;
399    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
400    /// let mut properties = HashMap::new();
401    /// properties.insert("root".to_string(), "/path/to/data".to_string());
402    /// properties.insert("manifest_enabled".to_string(), "true".to_string());
403    /// properties.insert("dir_listing_enabled".to_string(), "false".to_string());
404    /// properties.insert("storage.region".to_string(), "us-west-2".to_string());
405    ///
406    /// let namespace = DirectoryNamespaceBuilder::from_properties(properties, None)?
407    ///     .build()
408    ///     .await?;
409    /// # Ok(())
410    /// # }
411    /// ```
412    pub fn from_properties(
413        properties: HashMap<String, String>,
414        session: Option<Arc<Session>>,
415    ) -> Result<Self> {
416        // Extract root from properties (required)
417        let root = properties.get("root").cloned().ok_or_else(|| {
418            lance_core::Error::from(NamespaceError::InvalidInput {
419                message: "Missing required property 'root' for directory namespace".to_string(),
420            })
421        })?;
422
423        // Extract storage options (properties prefixed with "storage.")
424        let storage_options: HashMap<String, String> = properties
425            .iter()
426            .filter_map(|(k, v)| {
427                k.strip_prefix("storage.")
428                    .map(|key| (key.to_string(), v.clone()))
429            })
430            .collect();
431
432        let storage_options = if storage_options.is_empty() {
433            None
434        } else {
435            Some(storage_options)
436        };
437
438        // Extract manifest_enabled (default: true)
439        let manifest_enabled = properties
440            .get("manifest_enabled")
441            .and_then(|v| v.parse::<bool>().ok())
442            .unwrap_or(true);
443
444        // Extract dir_listing_enabled (default: true)
445        let dir_listing_enabled = properties
446            .get("dir_listing_enabled")
447            .and_then(|v| v.parse::<bool>().ok())
448            .unwrap_or(true);
449
450        // Extract inline_optimization_enabled (default: true)
451        let inline_optimization_enabled = properties
452            .get("inline_optimization_enabled")
453            .and_then(|v| v.parse::<bool>().ok())
454            .unwrap_or(true);
455
456        // Extract table_version_tracking_enabled (default: false)
457        let table_version_tracking_enabled = properties
458            .get("table_version_tracking_enabled")
459            .and_then(|v| v.parse::<bool>().ok())
460            .unwrap_or(false);
461
462        // Extract table_version_storage_enabled (default: false)
463        let table_version_storage_enabled = properties
464            .get("table_version_storage_enabled")
465            .and_then(|v| v.parse::<bool>().ok())
466            .unwrap_or(false);
467
468        // Extract dir_listing_to_manifest_migration_enabled (default: false)
469        let dir_listing_to_manifest_migration_enabled = properties
470            .get("dir_listing_to_manifest_migration_enabled")
471            .and_then(|v| v.parse::<bool>().ok())
472            .unwrap_or(false);
473
474        // Extract credential vendor properties (properties prefixed with "credential_vendor.")
475        // The prefix is stripped to get short property names
476        // The build() method will check if enabled=true before creating the vendor
477        let credential_vendor_properties: HashMap<String, String> = properties
478            .iter()
479            .filter_map(|(k, v)| {
480                k.strip_prefix("credential_vendor.")
481                    .map(|key| (key.to_string(), v.clone()))
482            })
483            .collect();
484
485        let commit_retries = properties
486            .get("commit_retries")
487            .and_then(|v| v.parse::<u32>().ok());
488
489        // Extract vend_input_storage_options (default: false)
490        let vend_input_storage_options = properties
491            .get("vend_input_storage_options")
492            .and_then(|v| v.parse::<bool>().ok())
493            .unwrap_or(false);
494
495        // Extract vend_input_storage_options_refresh_interval_millis (optional)
496        let vend_input_storage_options_refresh_interval_millis = properties
497            .get("vend_input_storage_options_refresh_interval_millis")
498            .and_then(|v| v.parse::<u64>().ok());
499
500        // Extract ops_metrics_enabled (default: false)
501        let ops_metrics_enabled = properties
502            .get("ops_metrics_enabled")
503            .and_then(|v| v.parse::<bool>().ok())
504            .unwrap_or(false);
505
506        Ok(Self {
507            root: root.trim_end_matches('/').to_string(),
508            storage_options,
509            session,
510            manifest_enabled,
511            dir_listing_enabled,
512            inline_optimization_enabled,
513            table_version_tracking_enabled,
514            table_version_storage_enabled,
515            dir_listing_to_manifest_migration_enabled,
516            credential_vendor_properties,
517            context_provider: None,
518            commit_retries,
519            vend_input_storage_options,
520            vend_input_storage_options_refresh_interval_millis,
521            ops_metrics_enabled,
522        })
523    }
524
525    /// Add a storage option.
526    ///
527    /// # Arguments
528    ///
529    /// * `key` - Storage option key (e.g., "region", "access_key_id")
530    /// * `value` - Storage option value
531    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
532        self.storage_options
533            .get_or_insert_with(HashMap::new)
534            .insert(key.into(), value.into());
535        self
536    }
537
538    /// Add multiple storage options.
539    ///
540    /// # Arguments
541    ///
542    /// * `options` - HashMap of storage options to add
543    pub fn storage_options(mut self, options: HashMap<String, String>) -> Self {
544        self.storage_options
545            .get_or_insert_with(HashMap::new)
546            .extend(options);
547        self
548    }
549
550    /// Set the Lance session to use for this namespace.
551    ///
552    /// When a session is provided, the namespace will reuse the session's
553    /// object store registry, allowing multiple namespaces and datasets
554    /// to share the same underlying storage connections.
555    ///
556    /// # Arguments
557    ///
558    /// * `session` - Arc-wrapped Lance session
559    pub fn session(mut self, session: Arc<Session>) -> Self {
560        self.session = Some(session);
561        self
562    }
563
564    /// Set the number of retries for commit operations on the manifest table.
565    /// If not set, defaults to [`lance_table::io::commit::CommitConfig`] default (20).
566    pub fn commit_retries(mut self, retries: u32) -> Self {
567        self.commit_retries = Some(retries);
568        self
569    }
570
571    /// Add a credential vendor property.
572    ///
573    /// Use short property names without the `credential_vendor.` prefix.
574    /// Common properties: `enabled`, `permission`.
575    /// AWS properties: `aws_role_arn`, `aws_external_id`, `aws_region`, `aws_role_session_name`, `aws_duration_millis`.
576    /// GCP properties: `gcp_service_account`, `gcp_workload_identity_provider`, `gcp_impersonation_service_account`.
577    /// Azure properties: `azure_account_name`, `azure_tenant_id`, `azure_federated_client_id`, `azure_duration_millis`.
578    ///
579    /// # Arguments
580    ///
581    /// * `key` - Property key (e.g., "enabled", "aws_role_arn")
582    /// * `value` - Property value
583    ///
584    /// # Example
585    ///
586    /// ```no_run
587    /// # use lance_namespace_impls::DirectoryNamespaceBuilder;
588    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
589    /// let namespace = DirectoryNamespaceBuilder::new("s3://my-bucket/data")
590    ///     .credential_vendor_property("enabled", "true")
591    ///     .credential_vendor_property("aws_role_arn", "arn:aws:iam::123456789012:role/MyRole")
592    ///     .credential_vendor_property("permission", "read")
593    ///     .build()
594    ///     .await?;
595    /// # Ok(())
596    /// # }
597    /// ```
598    pub fn credential_vendor_property(
599        mut self,
600        key: impl Into<String>,
601        value: impl Into<String>,
602    ) -> Self {
603        self.credential_vendor_properties
604            .insert(key.into(), value.into());
605        self
606    }
607
608    /// Add multiple credential vendor properties.
609    ///
610    /// Use short property names without the `credential_vendor.` prefix.
611    ///
612    /// # Arguments
613    ///
614    /// * `properties` - HashMap of credential vendor properties to add
615    pub fn credential_vendor_properties(mut self, properties: HashMap<String, String>) -> Self {
616        self.credential_vendor_properties.extend(properties);
617        self
618    }
619
620    /// Set a dynamic context provider for per-request context.
621    ///
622    /// The provider can be used to generate additional context for operations.
623    /// For DirectoryNamespace, the context is stored but not directly used
624    /// in operations (unlike RestNamespace where it's converted to HTTP headers).
625    ///
626    /// # Arguments
627    ///
628    /// * `provider` - The context provider implementation
629    pub fn context_provider(mut self, provider: Arc<dyn DynamicContextProvider>) -> Self {
630        self.context_provider = Some(provider);
631        self
632    }
633
634    /// Enable or disable returning input storage options in responses.
635    ///
636    /// When enabled, `describe_table` and `declare_table` will return the storage
637    /// options passed to the builder when no credential vendor is configured.
638    /// This is useful for testing scenarios where you want to pass storage options
639    /// through to clients.
640    ///
641    /// Default is false (storage options are not returned unless credential vending is configured).
642    pub fn vend_input_storage_options(mut self, enabled: bool) -> Self {
643        self.vend_input_storage_options = enabled;
644        self
645    }
646
647    /// Set the refresh interval for vended input storage options.
648    ///
649    /// When set, vended storage options will include an `expires_at_millis` field
650    /// calculated as `current_time_millis + interval_millis`. This allows clients
651    /// to know when to refresh credentials by calling `describe_table` again.
652    ///
653    /// This only has effect when `vend_input_storage_options` is enabled.
654    ///
655    /// # Arguments
656    ///
657    /// * `interval_millis` - The refresh interval in milliseconds
658    pub fn vend_input_storage_options_refresh_interval_millis(
659        mut self,
660        interval_millis: u64,
661    ) -> Self {
662        self.vend_input_storage_options_refresh_interval_millis = Some(interval_millis);
663        self
664    }
665
666    /// Enable or disable operation metrics tracking.
667    ///
668    /// When enabled, the namespace will track how many times each API operation
669    /// is called. Use `retrieve_ops_metrics()` on the built namespace to get
670    /// the current counts.
671    ///
672    /// Default is false.
673    pub fn ops_metrics_enabled(mut self, enabled: bool) -> Self {
674        self.ops_metrics_enabled = enabled;
675        self
676    }
677
678    /// Build the DirectoryNamespace.
679    ///
680    /// # Returns
681    ///
682    /// Returns a `DirectoryNamespace` instance.
683    ///
684    /// # Errors
685    ///
686    /// Returns an error if:
687    /// - The root path is invalid
688    /// - Connection to the storage backend fails
689    /// - Storage options are invalid
690    pub async fn build(self) -> Result<DirectoryNamespace> {
691        // Validate: table_version_storage_enabled requires manifest_enabled
692        if self.table_version_storage_enabled && !self.manifest_enabled {
693            return Err(NamespaceError::InvalidInput {
694                message: "table_version_storage_enabled requires manifest_enabled=true".to_string(),
695            }
696            .into());
697        }
698
699        let (object_store, base_path) =
700            Self::initialize_object_store(&self.root, &self.storage_options, &self.session).await?;
701
702        let manifest_ns = if self.manifest_enabled {
703            match manifest::ManifestNamespace::from_directory(
704                self.root.clone(),
705                self.storage_options.clone(),
706                self.session.clone(),
707                object_store.clone(),
708                base_path.clone(),
709                self.dir_listing_enabled,
710                self.inline_optimization_enabled,
711                self.commit_retries,
712                self.table_version_storage_enabled,
713            )
714            .await
715            {
716                Ok(ns) => Some(Arc::new(ns)),
717                Err(e) => {
718                    // Failed to initialize manifest namespace, fall back to directory listing only
719                    log::warn!(
720                        "Failed to initialize manifest namespace, falling back to directory listing only: {}",
721                        e
722                    );
723                    None
724                }
725            }
726        } else {
727            None
728        };
729
730        // Create credential vendor once during initialization if enabled
731        let credential_vendor = if has_credential_vendor_config(&self.credential_vendor_properties)
732        {
733            create_credential_vendor_for_location(&self.root, &self.credential_vendor_properties)
734                .await?
735                .map(Arc::from)
736        } else {
737            None
738        };
739
740        let ops_metrics = if self.ops_metrics_enabled {
741            Some(Arc::new(OpsMetrics::default()))
742        } else {
743            None
744        };
745
746        Ok(DirectoryNamespace {
747            root: self.root,
748            storage_options: self.storage_options,
749            session: self.session,
750            object_store,
751            base_path,
752            manifest_ns,
753            dir_listing_enabled: self.dir_listing_enabled,
754            dir_listing_to_manifest_migration_enabled: self
755                .dir_listing_to_manifest_migration_enabled,
756            table_version_tracking_enabled: self.table_version_tracking_enabled,
757            table_version_storage_enabled: self.table_version_storage_enabled,
758            credential_vendor,
759            context_provider: self.context_provider,
760            vend_input_storage_options: self.vend_input_storage_options,
761            vend_input_storage_options_refresh_interval_millis: self
762                .vend_input_storage_options_refresh_interval_millis,
763            ops_metrics,
764        })
765    }
766
767    /// Initialize the Lance ObjectStore based on the configuration
768    async fn initialize_object_store(
769        root: &str,
770        storage_options: &Option<HashMap<String, String>>,
771        session: &Option<Arc<Session>>,
772    ) -> Result<(Arc<ObjectStore>, Path)> {
773        // Build ObjectStoreParams from storage options
774        let accessor = storage_options.clone().map(|opts| {
775            Arc::new(lance_io::object_store::StorageOptionsAccessor::with_static_options(opts))
776        });
777        let params = ObjectStoreParams {
778            storage_options_accessor: accessor,
779            ..Default::default()
780        };
781
782        // Use object store registry from session if provided, otherwise create a new one
783        let registry = if let Some(session) = session {
784            session.store_registry()
785        } else {
786            Arc::new(ObjectStoreRegistry::default())
787        };
788
789        // Use Lance's object store factory to create from URI
790        let (object_store, base_path) = ObjectStore::from_uri_and_params(registry, root, &params)
791            .await
792            .map_err(|e| {
793                lance_core::Error::from(NamespaceError::Internal {
794                    message: format!("Failed to create object store: {:?}", e),
795                })
796            })?;
797
798        Ok((object_store, base_path))
799    }
800}
801
802/// Directory-based implementation of Lance Namespace.
803///
804/// This implementation stores tables as Lance datasets in a directory structure.
805/// It supports local filesystems and cloud storage backends through Lance's object store.
806///
807/// ## Manifest-based Listing
808///
809/// When `manifest_enabled=true`, the namespace uses a special `__manifest` Lance table to track tables
810/// instead of scanning the filesystem. This provides:
811/// - Better performance for listing operations
812/// - Ability to track table metadata
813/// - Foundation for future features like namespaces and table renaming
814///
815/// When `dir_listing_enabled=true`, the namespace falls back to directory scanning for tables not
816/// found in the manifest, enabling gradual migration.
817///
818/// ## Credential Vending
819///
820/// When credential vendor properties are configured, `describe_table` will vend temporary
821/// credentials based on the table location URI. The vendor type is auto-selected:
822/// - `s3://` locations use AWS STS AssumeRole
823/// - `gs://` locations use GCP OAuth2 tokens
824/// - `az://` locations use Azure SAS tokens
825pub struct DirectoryNamespace {
826    root: String,
827    storage_options: Option<HashMap<String, String>>,
828    session: Option<Arc<Session>>,
829    object_store: Arc<ObjectStore>,
830    base_path: Path,
831    manifest_ns: Option<Arc<manifest::ManifestNamespace>>,
832    dir_listing_enabled: bool,
833    /// When true, root-level table operations check the manifest first before
834    /// falling back to directory listing. When false, root-level tables skip
835    /// the manifest check and use directory listing directly.
836    dir_listing_to_manifest_migration_enabled: bool,
837    /// When true, `describe_table` returns `managed_versioning: true` to indicate
838    /// commits should go through namespace table version APIs.
839    table_version_tracking_enabled: bool,
840    /// When true, table versions are stored in the `__manifest` table.
841    table_version_storage_enabled: bool,
842    /// Credential vendor created once during initialization.
843    /// Used to vend temporary credentials for table access.
844    credential_vendor: Option<Arc<dyn CredentialVendor>>,
845    /// Dynamic context provider for per-request context.
846    /// Stored but not directly used in operations (available for future extensions).
847    #[allow(dead_code)]
848    context_provider: Option<Arc<dyn DynamicContextProvider>>,
849    /// When true, returns input storage options in responses when no credential vendor is configured.
850    vend_input_storage_options: bool,
851    /// Refresh interval in milliseconds for vended input storage options.
852    /// When set, expires_at_millis is added to storage options.
853    vend_input_storage_options_refresh_interval_millis: Option<u64>,
854    /// Operation metrics tracker, created when ops_metrics_enabled is true.
855    ops_metrics: Option<Arc<OpsMetrics>>,
856}
857
858impl std::fmt::Debug for DirectoryNamespace {
859    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
860        write!(f, "{}", self.namespace_id())
861    }
862}
863
864impl std::fmt::Display for DirectoryNamespace {
865    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
866        write!(f, "{}", self.namespace_id())
867    }
868}
869
870/// Describes the version ranges to delete for a single table.
871/// Used by `batch_delete_table_versions` and `delete_physical_version_files`.
872struct TableDeleteEntry {
873    table_id: Option<Vec<String>>,
874    ranges: Vec<(i64, i64)>,
875}
876
877impl DirectoryNamespace {
878    /// Apply pagination to a list of table names
879    ///
880    /// Sorts the list alphabetically and applies pagination using page_token (start_after) and limit.
881    ///
882    /// # Arguments
883    /// * `names` - The vector of table names to paginate
884    /// * `page_token` - Skip items until finding one greater than this value (start_after semantics)
885    /// * `limit` - Maximum number of items to keep
886    ///
887    /// # Returns
888    /// The next page token (last item in this page) if more results exist beyond the limit,
889    /// or `None` if this is the last page.
890    fn apply_pagination(
891        names: &mut Vec<String>,
892        page_token: Option<String>,
893        limit: Option<i32>,
894    ) -> Option<String> {
895        // Sort alphabetically for consistent ordering
896        names.sort();
897
898        // Apply page_token filtering (start_after semantics)
899        if let Some(start_after) = page_token {
900            if let Some(index) = names
901                .iter()
902                .position(|name| name.as_str() > start_after.as_str())
903            {
904                names.drain(0..index);
905            } else {
906                names.clear();
907            }
908        }
909
910        // Apply limit and compute next page token
911        if let Some(limit) = limit
912            && limit >= 0
913        {
914            let limit = limit as usize;
915            if names.len() > limit {
916                let next_page_token = if limit > 0 {
917                    Some(names[limit - 1].clone())
918                } else {
919                    None
920                };
921                names.truncate(limit);
922                return next_page_token;
923            }
924        }
925
926        None
927    }
928
929    /// List tables using directory scanning (fallback method)
930    async fn list_directory_tables(&self) -> Result<Vec<String>> {
931        let mut tables = Vec::new();
932        let entries = self
933            .object_store
934            .read_dir(self.base_path.clone())
935            .await
936            .map_err(|e| {
937                lance_core::Error::from(NamespaceError::Internal {
938                    message: format!("Failed to list directory: {:?}", e),
939                })
940            })?;
941
942        for entry in entries {
943            let path = entry.trim_end_matches('/');
944            if !path.ends_with(".lance") {
945                continue;
946            }
947
948            let table_name = &path[..path.len() - 6];
949
950            // Use atomic check to skip deregistered tables.
951            let status = self.check_table_status(table_name).await;
952            if status.is_deregistered {
953                continue;
954            }
955
956            tables.push(table_name.to_string());
957        }
958
959        Ok(tables)
960    }
961
962    /// Validate that the namespace ID represents the root namespace
963    fn validate_root_namespace_id(id: &Option<Vec<String>>) -> Result<()> {
964        if let Some(id) = id
965            && !id.is_empty()
966        {
967            return Err(NamespaceError::Unsupported {
968                message: format!(
969                    "Directory namespace only supports root namespace operations, but got namespace ID: {:?}. Expected empty ID.",
970                    id
971                ),
972            }
973            .into());
974        }
975        Ok(())
976    }
977
978    /// Extract table name from table ID
979    fn table_name_from_id(id: &Option<Vec<String>>) -> Result<String> {
980        let id = id.as_ref().ok_or_else(|| {
981            lance_core::Error::from(NamespaceError::InvalidInput {
982                message: "Directory namespace table ID cannot be empty".to_string(),
983            })
984        })?;
985
986        if id.len() != 1 {
987            return Err(NamespaceError::Unsupported {
988                message: format!(
989                    "Multi-level table IDs are only supported when manifest mode is enabled, but got: {:?}",
990                    id
991                ),
992            }
993            .into());
994        }
995
996        Ok(id[0].clone())
997    }
998
999    fn format_table_id(table_id: &[String]) -> String {
1000        format!(
1001            "table id '{}'",
1002            manifest::ManifestNamespace::str_object_id(table_id)
1003        )
1004    }
1005
1006    fn format_table_id_from_request(id: &Option<Vec<String>>) -> String {
1007        id.as_ref()
1008            .map(|table_id| Self::format_table_id(table_id))
1009            .unwrap_or_else(|| "table id '<unknown>'".to_string())
1010    }
1011
1012    async fn resolve_table_location(&self, id: &Option<Vec<String>>) -> Result<String> {
1013        let mut describe_req = DescribeTableRequest::new();
1014        describe_req.id = id.clone();
1015        describe_req.load_detailed_metadata = Some(false);
1016
1017        // Use internal impl to avoid counting this as an external API call
1018        let describe_resp = self.describe_table_impl(describe_req).await?;
1019
1020        describe_resp.location.ok_or_else(|| {
1021            lance_core::Error::from(NamespaceError::TableNotFound {
1022                message: format!("Table location not found for: {:?}", id),
1023            })
1024        })
1025    }
1026
1027    async fn table_has_actual_manifests(&self, table_name: &str) -> Result<bool> {
1028        manifest::ManifestNamespace::path_has_actual_manifests(
1029            &self.object_store,
1030            &self.table_path(table_name),
1031        )
1032        .await
1033    }
1034
1035    async fn filter_declared_tables(
1036        &self,
1037        tables: Vec<String>,
1038        include_declared: bool,
1039    ) -> Result<Vec<String>> {
1040        if include_declared {
1041            return Ok(tables);
1042        }
1043
1044        let mut stream = futures::stream::iter(tables.into_iter().map(|table_name| async move {
1045            // `include_declared=false` is an explicit opt-in. We still pay one `_versions/` probe
1046            // per table here so declared-state is derived from actual manifests. This is linear in
1047            // the total number of listed tables, but we probe a bounded number concurrently.
1048            if self.table_has_actual_manifests(&table_name).await? {
1049                Ok::<Option<String>, Error>(Some(table_name))
1050            } else {
1051                Ok::<Option<String>, Error>(None)
1052            }
1053        }))
1054        .buffered(manifest::DECLARED_FILTER_CONCURRENCY);
1055
1056        let mut filtered = Vec::new();
1057        while let Some(result) = stream.next().await {
1058            if let Some(table_name) = result? {
1059                filtered.push(table_name);
1060            }
1061        }
1062        Ok(filtered)
1063    }
1064
1065    fn ipc_reader_from_request_data(
1066        request_data: &Bytes,
1067        operation: &str,
1068    ) -> Result<(
1069        Box<dyn arrow::record_batch::RecordBatchReader + Send>,
1070        usize,
1071    )> {
1072        if request_data.is_empty() {
1073            return Err(NamespaceError::InvalidInput {
1074                message: format!(
1075                    "Request data (Arrow IPC stream) is required for {}",
1076                    operation
1077                ),
1078            }
1079            .into());
1080        }
1081
1082        let cursor = Cursor::new(request_data.as_ref());
1083        let stream_reader =
1084            StreamReader::try_new(cursor, None).map_err(|e| NamespaceError::InvalidInput {
1085                message: format!("Invalid Arrow IPC stream: {}", e),
1086            })?;
1087        let arrow_schema = stream_reader.schema();
1088
1089        let mut num_rows = 0usize;
1090        let mut batches = Vec::new();
1091        for batch_result in stream_reader {
1092            let batch = batch_result.map_err(|e| NamespaceError::Internal {
1093                message: format!("Failed to read batch from IPC stream: {}", e),
1094            })?;
1095            num_rows += batch.num_rows();
1096            batches.push(batch);
1097        }
1098
1099        let reader: Box<dyn arrow::record_batch::RecordBatchReader + Send> = if batches.is_empty() {
1100            let batch = arrow::record_batch::RecordBatch::new_empty(arrow_schema.clone());
1101            Box::new(RecordBatchIterator::new(vec![Ok(batch)], arrow_schema))
1102        } else {
1103            let batch_results: Vec<_> = batches.into_iter().map(Ok).collect();
1104            Box::new(RecordBatchIterator::new(batch_results, arrow_schema))
1105        };
1106
1107        Ok((reader, num_rows))
1108    }
1109
1110    async fn table_uri_has_actual_manifests(&self, table_uri: &str) -> Result<bool> {
1111        let table_path = self.object_store_path_from_uri(table_uri)?;
1112        manifest::ManifestNamespace::path_has_actual_manifests(&self.object_store, &table_path)
1113            .await
1114    }
1115
1116    fn object_store_path_from_uri(&self, uri: &str) -> Result<Path> {
1117        let registry = self
1118            .session
1119            .as_ref()
1120            .map(|session| session.store_registry())
1121            .unwrap_or_else(|| Arc::new(ObjectStoreRegistry::default()));
1122        ObjectStore::extract_path_from_uri(registry, uri)
1123    }
1124
1125    fn validate_dir_only_properties(
1126        properties: Option<&HashMap<String, String>>,
1127        operation: &str,
1128    ) -> Result<()> {
1129        // Dir-only mode has no metadata catalog, so non-empty table properties would be accepted
1130        // and then lost. Reject them instead. Request-level storage options are different: they
1131        // directly affect the current write and remain supported in dir-only mode.
1132        if properties.is_some_and(|properties| !properties.is_empty()) {
1133            return Err(NamespaceError::Unsupported {
1134                message: format!(
1135                    "{} with non-empty table properties requires manifest_enabled=true",
1136                    operation
1137                ),
1138            }
1139            .into());
1140        }
1141        Ok(())
1142    }
1143
1144    async fn write_reader_to_table(
1145        &self,
1146        table_uri: &str,
1147        reader: Box<dyn arrow::record_batch::RecordBatchReader + Send>,
1148        mode: WriteMode,
1149        extra_storage_options: Option<HashMap<String, String>>,
1150    ) -> Result<Dataset> {
1151        // Insert and merge-insert request models do not carry request-level storage options,
1152        // so these writes intentionally use the namespace-level storage options only.
1153        let mut merged_storage_options = self.storage_options.clone().unwrap_or_default();
1154        if let Some(extra_storage_options) = extra_storage_options {
1155            merged_storage_options.extend(extra_storage_options);
1156        }
1157        let store_params = (!merged_storage_options.is_empty()).then(|| ObjectStoreParams {
1158            storage_options_accessor: Some(Arc::new(
1159                lance_io::object_store::StorageOptionsAccessor::with_static_options(
1160                    merged_storage_options,
1161                ),
1162            )),
1163            ..Default::default()
1164        });
1165
1166        let write_params = WriteParams {
1167            mode,
1168            store_params,
1169            session: self.session.clone(),
1170            ..Default::default()
1171        };
1172
1173        let dataset = Dataset::write(reader, table_uri, Some(write_params))
1174            .await
1175            .map_err(|e| NamespaceError::Internal {
1176                message: format!("Failed to write table at '{}': {}", table_uri, e),
1177            })?;
1178
1179        Ok(dataset)
1180    }
1181
1182    async fn list_table_versions_from_storage(
1183        &self,
1184        table_uri: &str,
1185        descending: bool,
1186        limit: Option<i32>,
1187    ) -> Result<Vec<TableVersion>> {
1188        let table_path = self.object_store_path_from_uri(table_uri)?;
1189        let versions_dir = table_path.clone().join(VERSIONS_DIR);
1190        let manifest_metas: Vec<_> = self
1191            .object_store
1192            .read_dir_all(&versions_dir, None)
1193            .try_collect()
1194            .await
1195            .map_err(|e| {
1196                lance_core::Error::from(NamespaceError::Internal {
1197                    message: format!(
1198                        "Failed to list manifest files for table at '{}': {}",
1199                        table_uri, e
1200                    ),
1201                })
1202            })?;
1203
1204        let is_v2_naming = manifest_metas
1205            .first()
1206            .is_some_and(|meta| meta.location.filename().is_some_and(|f| f.len() == 29));
1207
1208        let mut table_versions: Vec<TableVersion> = manifest_metas
1209            .into_iter()
1210            .filter_map(|meta| {
1211                let filename = meta.location.filename()?;
1212                let version_str = filename.strip_suffix(".manifest")?;
1213                if version_str.starts_with('d') {
1214                    return None;
1215                }
1216                let file_version: u64 = version_str.parse().ok()?;
1217
1218                let actual_version = if file_version > u64::MAX / 2 {
1219                    u64::MAX - file_version
1220                } else {
1221                    file_version
1222                };
1223
1224                Some(TableVersion {
1225                    version: actual_version as i64,
1226                    manifest_path: meta.location.to_string(),
1227                    manifest_size: Some(meta.size as i64),
1228                    e_tag: meta.e_tag,
1229                    timestamp_millis: Some(meta.last_modified.timestamp_millis()),
1230                    metadata: None,
1231                })
1232            })
1233            .collect();
1234
1235        let list_is_ordered = self.object_store.list_is_lexically_ordered;
1236
1237        let needs_sort = if list_is_ordered {
1238            if is_v2_naming {
1239                !descending
1240            } else {
1241                descending
1242            }
1243        } else {
1244            true
1245        };
1246
1247        if needs_sort {
1248            if descending {
1249                table_versions.sort_by(|a, b| b.version.cmp(&a.version));
1250            } else {
1251                table_versions.sort_by(|a, b| a.version.cmp(&b.version));
1252            }
1253        }
1254
1255        if let Some(limit) = limit {
1256            table_versions.truncate(limit as usize);
1257        }
1258
1259        Ok(table_versions)
1260    }
1261
1262    /// Internal describe_table implementation that doesn't record metrics.
1263    /// Used by both the public describe_table (which records metrics) and
1264    /// internal callers like resolve_table_location (which shouldn't).
1265    async fn describe_table_impl(
1266        &self,
1267        request: DescribeTableRequest,
1268    ) -> Result<DescribeTableResponse> {
1269        let is_root_level = request.id.as_ref().is_some_and(|id| id.len() == 1);
1270        let skip_manifest_for_root = self.dir_listing_enabled
1271            && is_root_level
1272            && !self.dir_listing_to_manifest_migration_enabled;
1273        if let Some(ref manifest_ns) = self.manifest_ns
1274            && !skip_manifest_for_root
1275        {
1276            match manifest_ns.describe_table(request.clone()).await {
1277                Ok(mut response) => {
1278                    if let Some(ref table_uri) = response.table_uri {
1279                        // For backwards compatibility, only skip vending credentials when explicitly set to false
1280                        let vend = request.vend_credentials.unwrap_or(true);
1281                        let identity = request.identity.as_deref();
1282                        response.storage_options = self
1283                            .get_storage_options_for_table(table_uri, vend, identity)
1284                            .await?;
1285                    }
1286                    // Set managed_versioning flag when table_version_tracking_enabled
1287                    if self.table_version_tracking_enabled {
1288                        response.managed_versioning = Some(true);
1289                    }
1290                    return Ok(response);
1291                }
1292                Err(_) if self.dir_listing_enabled && is_root_level => {
1293                    // Fall through to directory check only for single-level IDs
1294                }
1295                Err(e) => return Err(e),
1296            }
1297        }
1298
1299        let table_name = Self::table_name_from_id(&request.id)?;
1300        let table_id = Self::format_table_id_from_request(&request.id);
1301        let table_uri = self.table_full_uri(&table_name);
1302
1303        // Atomically check table existence and deregistration status
1304        let status = self.check_table_status(&table_name).await;
1305
1306        if !status.exists {
1307            return Err(NamespaceError::TableNotFound {
1308                message: table_id.clone(),
1309            }
1310            .into());
1311        }
1312
1313        if status.is_deregistered {
1314            return Err(NamespaceError::TableNotFound {
1315                message: format!("Table is deregistered: {}", table_id),
1316            }
1317            .into());
1318        }
1319
1320        let load_detailed_metadata = request.load_detailed_metadata.unwrap_or(false);
1321        let should_check_declared =
1322            load_detailed_metadata || request.check_declared.unwrap_or(false);
1323        // For backwards compatibility, only skip vending credentials when explicitly set to false
1324        let vend_credentials = request.vend_credentials.unwrap_or(true);
1325        let identity = request.identity.as_deref();
1326        let is_only_declared = if should_check_declared {
1327            if status.has_reserved_file {
1328                Some(!self.table_has_actual_manifests(&table_name).await?)
1329            } else {
1330                Some(false)
1331            }
1332        } else {
1333            None
1334        };
1335
1336        if !load_detailed_metadata {
1337            let storage_options = self
1338                .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1339                .await?;
1340            return Ok(DescribeTableResponse {
1341                table: Some(table_name),
1342                namespace: request.id.as_ref().map(|id| {
1343                    if id.len() > 1 {
1344                        id[..id.len() - 1].to_vec()
1345                    } else {
1346                        vec![]
1347                    }
1348                }),
1349                location: Some(table_uri.clone()),
1350                table_uri: Some(table_uri),
1351                storage_options,
1352                is_only_declared,
1353                managed_versioning: if self.table_version_tracking_enabled {
1354                    Some(true)
1355                } else {
1356                    None
1357                },
1358                ..Default::default()
1359            });
1360        }
1361
1362        if is_only_declared == Some(true) {
1363            let storage_options = self
1364                .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1365                .await?;
1366            return Ok(DescribeTableResponse {
1367                table: Some(table_name),
1368                namespace: request.id.as_ref().map(|id| {
1369                    if id.len() > 1 {
1370                        id[..id.len() - 1].to_vec()
1371                    } else {
1372                        vec![]
1373                    }
1374                }),
1375                location: Some(table_uri.clone()),
1376                table_uri: Some(table_uri),
1377                storage_options,
1378                is_only_declared,
1379                managed_versioning: if self.table_version_tracking_enabled {
1380                    Some(true)
1381                } else {
1382                    None
1383                },
1384                ..Default::default()
1385            });
1386        }
1387
1388        // Try to load the dataset to get real information
1389        // Use DatasetBuilder with storage options to support S3 with custom endpoints
1390        let mut builder = DatasetBuilder::from_uri(&table_uri);
1391        if let Some(opts) = &self.storage_options {
1392            builder = builder.with_storage_options(opts.clone());
1393        }
1394        if let Some(sess) = &self.session {
1395            builder = builder.with_session(sess.clone());
1396        }
1397        match builder.load().await {
1398            Ok(mut dataset) => {
1399                // If a specific version is requested, checkout that version
1400                if let Some(requested_version) = request.version {
1401                    dataset = dataset
1402                        .checkout_version(requested_version as u64)
1403                        .await
1404                        .map_err(|e| {
1405                            lance_core::Error::from(NamespaceError::TableVersionNotFound {
1406                                message: format!(
1407                                    "Version {} not found for table '{}': {}",
1408                                    requested_version, table_name, e
1409                                ),
1410                            })
1411                        })?;
1412                }
1413
1414                let version_info = dataset.version();
1415                let lance_schema = dataset.schema();
1416                let arrow_schema: arrow_schema::Schema = lance_schema.into();
1417                let json_schema = arrow_schema_to_json(&arrow_schema)?;
1418                let storage_options = self
1419                    .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1420                    .await?;
1421
1422                // Convert BTreeMap to HashMap for the response
1423                let metadata: std::collections::HashMap<String, String> =
1424                    version_info.metadata.into_iter().collect();
1425
1426                Ok(DescribeTableResponse {
1427                    table: Some(table_name),
1428                    namespace: request.id.as_ref().map(|id| {
1429                        if id.len() > 1 {
1430                            id[..id.len() - 1].to_vec()
1431                        } else {
1432                            vec![]
1433                        }
1434                    }),
1435                    version: Some(version_info.version as i64),
1436                    location: Some(table_uri.clone()),
1437                    table_uri: Some(table_uri),
1438                    schema: Some(Box::new(json_schema)),
1439                    storage_options,
1440                    metadata: Some(metadata),
1441                    is_only_declared,
1442                    managed_versioning: if self.table_version_tracking_enabled {
1443                        Some(true)
1444                    } else {
1445                        None
1446                    },
1447                    ..Default::default()
1448                })
1449            }
1450            Err(err) => {
1451                if manifest::ManifestNamespace::is_not_found_load_error(&err)
1452                    && is_only_declared == Some(true)
1453                {
1454                    let storage_options = self
1455                        .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1456                        .await?;
1457                    Ok(DescribeTableResponse {
1458                        table: Some(table_name),
1459                        namespace: request.id.as_ref().map(|id| {
1460                            if id.len() > 1 {
1461                                id[..id.len() - 1].to_vec()
1462                            } else {
1463                                vec![]
1464                            }
1465                        }),
1466                        location: Some(table_uri.clone()),
1467                        table_uri: Some(table_uri),
1468                        storage_options,
1469                        is_only_declared,
1470                        managed_versioning: if self.table_version_tracking_enabled {
1471                            Some(true)
1472                        } else {
1473                            None
1474                        },
1475                        ..Default::default()
1476                    })
1477                } else {
1478                    Err(NamespaceError::Internal {
1479                        message: format!(
1480                            "Table directory exists but cannot load dataset {}: {:?}",
1481                            table_name, err
1482                        ),
1483                    }
1484                    .into())
1485                }
1486            }
1487        }
1488    }
1489
1490    async fn load_dataset(
1491        &self,
1492        table_uri: &str,
1493        version: Option<i64>,
1494        operation: &str,
1495    ) -> Result<Dataset> {
1496        if let Some(version) = version
1497            && version < 0
1498        {
1499            return Err(NamespaceError::InvalidInput {
1500                message: format!(
1501                    "Table version for {} must be non-negative, got {}",
1502                    operation, version
1503                ),
1504            }
1505            .into());
1506        }
1507
1508        let mut builder = DatasetBuilder::from_uri(table_uri);
1509        if let Some(opts) = &self.storage_options {
1510            builder = builder.with_storage_options(opts.clone());
1511        }
1512        if let Some(sess) = &self.session {
1513            builder = builder.with_session(sess.clone());
1514        }
1515
1516        let dataset = builder.load().await.map_err(|e| {
1517            lance_core::Error::from(NamespaceError::TableNotFound {
1518                message: format!(
1519                    "Failed to open table at '{}' for {}: {}",
1520                    table_uri, operation, e
1521                ),
1522            })
1523        })?;
1524
1525        if let Some(version) = version {
1526            return dataset.checkout_version(version as u64).await.map_err(|e| {
1527                lance_core::Error::from(NamespaceError::TableVersionNotFound {
1528                    message: format!(
1529                        "Failed to checkout version {} for table at '{}' during {}: {}",
1530                        version, table_uri, operation, e
1531                    ),
1532                })
1533            });
1534        }
1535
1536        Ok(dataset)
1537    }
1538
1539    fn parse_index_type(index_type: &str) -> Result<IndexType> {
1540        match index_type.trim().to_ascii_uppercase().as_str() {
1541            "SCALAR" | "BTREE" => Ok(IndexType::BTree),
1542            "BITMAP" => Ok(IndexType::Bitmap),
1543            "LABEL_LIST" | "LABELLIST" => Ok(IndexType::LabelList),
1544            "INVERTED" | "FTS" => Ok(IndexType::Inverted),
1545            "NGRAM" => Ok(IndexType::NGram),
1546            "ZONEMAP" | "ZONE_MAP" => Ok(IndexType::ZoneMap),
1547            "BLOOMFILTER" | "BLOOM_FILTER" => Ok(IndexType::BloomFilter),
1548            "RTREE" | "R_TREE" => Ok(IndexType::RTree),
1549            "VECTOR" | "IVF_PQ" => Ok(IndexType::IvfPq),
1550            "IVF_FLAT" => Ok(IndexType::IvfFlat),
1551            "IVF_SQ" => Ok(IndexType::IvfSq),
1552            "IVF_RQ" => Ok(IndexType::IvfRq),
1553            "IVF_HNSW_FLAT" => Ok(IndexType::IvfHnswFlat),
1554            "IVF_HNSW_SQ" => Ok(IndexType::IvfHnswSq),
1555            "IVF_HNSW_PQ" => Ok(IndexType::IvfHnswPq),
1556            other => Err(NamespaceError::InvalidInput {
1557                message: format!("Unsupported index_type '{}'", other),
1558            }
1559            .into()),
1560        }
1561    }
1562
1563    fn parse_metric_type(distance_type: Option<&str>) -> Result<MetricType> {
1564        let distance_type = distance_type.unwrap_or("l2");
1565        MetricType::try_from(distance_type).map_err(|e| {
1566            lance_core::Error::from(NamespaceError::InvalidInput {
1567                message: format!(
1568                    "Unsupported distance_type '{}' for vector index: {}",
1569                    distance_type, e
1570                ),
1571            })
1572        })
1573    }
1574
1575    fn build_index_params(request: &CreateTableIndexRequest) -> Result<DirectoryIndexParams> {
1576        let index_type = Self::parse_index_type(&request.index_type)?;
1577        Ok(match index_type {
1578            IndexType::BTree => DirectoryIndexParams::Scalar {
1579                index_type,
1580                params: ScalarIndexParams::for_builtin(BuiltinIndexType::BTree),
1581            },
1582            IndexType::Bitmap => DirectoryIndexParams::Scalar {
1583                index_type,
1584                params: ScalarIndexParams::for_builtin(BuiltinIndexType::Bitmap),
1585            },
1586            IndexType::LabelList => DirectoryIndexParams::Scalar {
1587                index_type,
1588                params: ScalarIndexParams::for_builtin(BuiltinIndexType::LabelList),
1589            },
1590            IndexType::NGram => DirectoryIndexParams::Scalar {
1591                index_type,
1592                params: ScalarIndexParams::for_builtin(BuiltinIndexType::NGram),
1593            },
1594            IndexType::ZoneMap => DirectoryIndexParams::Scalar {
1595                index_type,
1596                params: ScalarIndexParams::for_builtin(BuiltinIndexType::ZoneMap),
1597            },
1598            IndexType::BloomFilter => DirectoryIndexParams::Scalar {
1599                index_type,
1600                params: ScalarIndexParams::for_builtin(BuiltinIndexType::BloomFilter),
1601            },
1602            IndexType::RTree => DirectoryIndexParams::Scalar {
1603                index_type,
1604                params: ScalarIndexParams::for_builtin(BuiltinIndexType::RTree),
1605            },
1606            IndexType::Inverted => {
1607                let mut params = InvertedIndexParams::default();
1608                if let Some(with_position) = request.with_position {
1609                    params = params.with_position(with_position);
1610                }
1611                if let Some(base_tokenizer) = &request.base_tokenizer {
1612                    params = params.base_tokenizer(base_tokenizer.clone());
1613                }
1614                if let Some(language) = &request.language {
1615                    params = params.language(language)?;
1616                }
1617                if let Some(max_token_length) = request.max_token_length {
1618                    if max_token_length < 0 {
1619                        return Err(NamespaceError::InvalidInput {
1620                            message: format!(
1621                                "FTS max_token_length must be non-negative, got {}",
1622                                max_token_length
1623                            ),
1624                        }
1625                        .into());
1626                    }
1627                    params = params.max_token_length(Some(max_token_length as usize));
1628                }
1629                if let Some(lower_case) = request.lower_case {
1630                    params = params.lower_case(lower_case);
1631                }
1632                if let Some(stem) = request.stem {
1633                    params = params.stem(stem);
1634                }
1635                if let Some(remove_stop_words) = request.remove_stop_words {
1636                    params = params.remove_stop_words(remove_stop_words);
1637                }
1638                if let Some(ascii_folding) = request.ascii_folding {
1639                    params = params.ascii_folding(ascii_folding);
1640                }
1641                DirectoryIndexParams::Inverted(params)
1642            }
1643            IndexType::IvfFlat => DirectoryIndexParams::Vector {
1644                index_type,
1645                params: VectorIndexParams::with_ivf_flat_params(
1646                    Self::parse_metric_type(request.distance_type.as_deref())?,
1647                    IvfBuildParams::default(),
1648                ),
1649            },
1650            IndexType::IvfPq => DirectoryIndexParams::Vector {
1651                index_type,
1652                params: VectorIndexParams::with_ivf_pq_params(
1653                    Self::parse_metric_type(request.distance_type.as_deref())?,
1654                    IvfBuildParams::default(),
1655                    PQBuildParams::default(),
1656                ),
1657            },
1658            IndexType::IvfSq => DirectoryIndexParams::Vector {
1659                index_type,
1660                params: VectorIndexParams::with_ivf_sq_params(
1661                    Self::parse_metric_type(request.distance_type.as_deref())?,
1662                    IvfBuildParams::default(),
1663                    SQBuildParams::default(),
1664                ),
1665            },
1666            IndexType::IvfRq => DirectoryIndexParams::Vector {
1667                index_type,
1668                params: VectorIndexParams::with_ivf_rq_params(
1669                    Self::parse_metric_type(request.distance_type.as_deref())?,
1670                    IvfBuildParams::default(),
1671                    RQBuildParams::default(),
1672                ),
1673            },
1674            IndexType::IvfHnswFlat => DirectoryIndexParams::Vector {
1675                index_type,
1676                params: VectorIndexParams::ivf_hnsw(
1677                    Self::parse_metric_type(request.distance_type.as_deref())?,
1678                    IvfBuildParams::default(),
1679                    HnswBuildParams::default(),
1680                ),
1681            },
1682            IndexType::IvfHnswSq => DirectoryIndexParams::Vector {
1683                index_type,
1684                params: VectorIndexParams::with_ivf_hnsw_sq_params(
1685                    Self::parse_metric_type(request.distance_type.as_deref())?,
1686                    IvfBuildParams::default(),
1687                    HnswBuildParams::default(),
1688                    SQBuildParams::default(),
1689                ),
1690            },
1691            IndexType::IvfHnswPq => DirectoryIndexParams::Vector {
1692                index_type,
1693                params: VectorIndexParams::with_ivf_hnsw_pq_params(
1694                    Self::parse_metric_type(request.distance_type.as_deref())?,
1695                    IvfBuildParams::default(),
1696                    HnswBuildParams::default(),
1697                    PQBuildParams::default(),
1698                ),
1699            },
1700            other => {
1701                return Err(NamespaceError::InvalidInput {
1702                    message: format!("Unsupported index type for namespace API: {}", other),
1703                }
1704                .into());
1705            }
1706        })
1707    }
1708
1709    fn paginate_indices(
1710        indices: &mut Vec<IndexContent>,
1711        page_token: Option<String>,
1712        limit: Option<i32>,
1713    ) -> Option<String> {
1714        indices.sort_by(|a, b| a.index_name.cmp(&b.index_name));
1715
1716        if let Some(start_after) = page_token {
1717            if let Some(index) = indices
1718                .iter()
1719                .position(|index| index.index_name.as_str() > start_after.as_str())
1720            {
1721                indices.drain(0..index);
1722            } else {
1723                indices.clear();
1724            }
1725        }
1726
1727        let mut next_page_token = None;
1728        if let Some(limit) = limit
1729            && limit >= 0
1730        {
1731            let limit = limit as usize;
1732            if limit > 0 && indices.len() > limit {
1733                next_page_token = Some(indices[limit - 1].index_name.clone());
1734            }
1735            indices.truncate(limit);
1736        }
1737        if indices.is_empty() {
1738            None
1739        } else {
1740            next_page_token
1741        }
1742    }
1743
1744    fn transaction_operation_name(transaction: &Transaction) -> String {
1745        match &transaction.operation {
1746            Operation::CreateIndex {
1747                new_indices,
1748                removed_indices,
1749            } if new_indices.is_empty() && !removed_indices.is_empty() => "DropIndex".to_string(),
1750            _ => transaction.operation.to_string(),
1751        }
1752    }
1753
1754    fn transaction_response(
1755        version: u64,
1756        transaction: &Transaction,
1757    ) -> DescribeTransactionResponse {
1758        let mut properties = transaction
1759            .transaction_properties
1760            .as_ref()
1761            .map(|properties| (**properties).clone())
1762            .unwrap_or_default();
1763        properties.insert("uuid".to_string(), transaction.uuid.clone());
1764        properties.insert("version".to_string(), version.to_string());
1765        properties.insert(
1766            "read_version".to_string(),
1767            transaction.read_version.to_string(),
1768        );
1769        properties.insert(
1770            "operation".to_string(),
1771            Self::transaction_operation_name(transaction),
1772        );
1773        if let Some(tag) = &transaction.tag {
1774            properties.insert("tag".to_string(), tag.clone());
1775        }
1776
1777        DescribeTransactionResponse {
1778            status: "SUCCEEDED".to_string(),
1779            properties: Some(properties),
1780        }
1781    }
1782
1783    fn describe_table_index_stats_response(
1784        stats: &serde_json::Value,
1785    ) -> DescribeTableIndexStatsResponse {
1786        let get_i64 = |key: &str| {
1787            stats.get(key).and_then(|value| {
1788                value
1789                    .as_i64()
1790                    .or_else(|| value.as_u64().and_then(|v| i64::try_from(v).ok()))
1791            })
1792        };
1793
1794        DescribeTableIndexStatsResponse {
1795            distance_type: stats
1796                .get("distance_type")
1797                .and_then(|value| value.as_str())
1798                .map(str::to_string),
1799            index_type: stats
1800                .get("index_type")
1801                .and_then(|value| value.as_str())
1802                .map(str::to_string),
1803            num_indexed_rows: get_i64("num_indexed_rows"),
1804            num_unindexed_rows: get_i64("num_unindexed_rows"),
1805            num_indices: get_i64("num_indices").and_then(|value| i32::try_from(value).ok()),
1806        }
1807    }
1808
1809    /// When transaction_id is not parseable as a version number (i.e. it's a UUID),
1810    /// find_transaction iterates through every version in reverse, reading each
1811    /// transaction file from storage. For tables with many versions this will
1812    /// be extremely slow — each iteration is a separate I/O call.
1813    async fn find_transaction(&self, dataset: &Dataset, id: &str) -> Result<(u64, Transaction)> {
1814        if let Ok(version) = id.parse::<u64>() {
1815            let transaction = dataset
1816                .read_transaction_by_version(version)
1817                .await
1818                .map_err(|e| {
1819                    lance_core::Error::from(NamespaceError::TransactionNotFound {
1820                        message: format!(
1821                            "Failed to read transaction for version {}: {}",
1822                            version, e
1823                        ),
1824                    })
1825                })?
1826                .ok_or_else(|| {
1827                    lance_core::Error::from(NamespaceError::TransactionNotFound {
1828                        message: format!("version {}", version),
1829                    })
1830                })?;
1831            return Ok((version, transaction));
1832        }
1833
1834        let versions = dataset.versions().await.map_err(|e| {
1835            lance_core::Error::from(NamespaceError::Internal {
1836                message: format!(
1837                    "Failed to list table versions while resolving transaction '{}': {}",
1838                    id, e
1839                ),
1840            })
1841        })?;
1842
1843        for version in versions.into_iter().rev() {
1844            if let Some(transaction) = dataset
1845                .read_transaction_by_version(version.version)
1846                .await
1847                .map_err(|e| {
1848                    lance_core::Error::from(NamespaceError::Internal {
1849                        message: format!(
1850                            "Failed to read transaction for version {} while resolving '{}': {}",
1851                            version.version, id, e
1852                        ),
1853                    })
1854                })?
1855                && transaction.uuid == id
1856            {
1857                return Ok((version.version, transaction));
1858            }
1859        }
1860
1861        Err(NamespaceError::TransactionNotFound {
1862            message: id.to_string(),
1863        }
1864        .into())
1865    }
1866
1867    fn table_full_uri(&self, table_name: &str) -> String {
1868        format!("{}/{}.lance", &self.root, table_name)
1869    }
1870
1871    /// Get the object store path for a table (relative to base_path)
1872    fn table_path(&self, table_name: &str) -> Path {
1873        self.base_path
1874            .clone()
1875            .join(format!("{}.lance", table_name).as_str())
1876    }
1877
1878    /// Get the reserved file path for a table
1879    fn table_reserved_file_path(&self, table_name: &str) -> Path {
1880        self.base_path
1881            .clone()
1882            .join(format!("{}.lance", table_name).as_str())
1883            .join(".lance-reserved")
1884    }
1885
1886    /// Get the deregistered marker file path for a table
1887    fn table_deregistered_file_path(&self, table_name: &str) -> Path {
1888        self.base_path
1889            .clone()
1890            .join(format!("{}.lance", table_name).as_str())
1891            .join(".lance-deregistered")
1892    }
1893
1894    /// Atomically check table existence and deregistration status.
1895    ///
1896    /// This performs a single directory listing to get a consistent snapshot of the
1897    /// table's state, avoiding race conditions between checking existence and
1898    /// checking deregistration status.
1899    pub(crate) async fn check_table_status(&self, table_name: &str) -> TableStatus {
1900        let table_path = self.table_path(table_name);
1901        match self.object_store.read_dir(table_path).await {
1902            Ok(entries) => {
1903                let exists = !entries.is_empty();
1904                let is_deregistered = entries.iter().any(|e| e.ends_with(".lance-deregistered"));
1905                let has_reserved_file = entries.iter().any(|e| e.ends_with(".lance-reserved"));
1906                TableStatus {
1907                    exists,
1908                    is_deregistered,
1909                    has_reserved_file,
1910                }
1911            }
1912            Err(_) => TableStatus {
1913                exists: false,
1914                is_deregistered: false,
1915                has_reserved_file: false,
1916            },
1917        }
1918    }
1919
1920    async fn put_marker_file_atomic(
1921        &self,
1922        path: &Path,
1923        file_description: &str,
1924    ) -> std::result::Result<(), String> {
1925        let put_opts = PutOptions {
1926            mode: PutMode::Create,
1927            ..Default::default()
1928        };
1929
1930        match self
1931            .object_store
1932            .inner
1933            .put_opts(path, bytes::Bytes::new().into(), put_opts)
1934            .await
1935        {
1936            Ok(_) => Ok(()),
1937            Err(ObjectStoreError::AlreadyExists { .. })
1938            | Err(ObjectStoreError::Precondition { .. }) => {
1939                Err(format!("{} already exists", file_description))
1940            }
1941            Err(e) => Err(format!("Failed to create {}: {:?}", file_description, e)),
1942        }
1943    }
1944
1945    /// Get storage options for a table, using credential vending if configured.
1946    ///
1947    /// If credential vendor properties are configured and the table location matches
1948    /// a supported cloud provider, this will create an appropriate vendor and vend
1949    /// temporary credentials scoped to the table location. Otherwise, returns the
1950    /// static storage options.
1951    ///
1952    /// The vendor type is auto-selected based on the table URI:
1953    /// - `s3://` locations use AWS STS AssumeRole
1954    /// - `gs://` locations use GCP OAuth2 tokens
1955    /// - `az://` locations use Azure SAS tokens
1956    ///
1957    /// The permission level (Read, Write, Admin) is configured at namespace
1958    /// initialization time via the `credential_vendor_permission` property.
1959    ///
1960    /// # Arguments
1961    ///
1962    /// * `table_uri` - The full URI of the table
1963    /// * `identity` - Optional identity from the request for identity-based credential vending
1964    async fn get_storage_options_for_table(
1965        &self,
1966        table_uri: &str,
1967        vend_credentials: bool,
1968        identity: Option<&Identity>,
1969    ) -> Result<Option<HashMap<String, String>>> {
1970        if vend_credentials && let Some(ref vendor) = self.credential_vendor {
1971            let vended = vendor.vend_credentials(table_uri, identity).await?;
1972            return Ok(Some(vended.storage_options));
1973        }
1974        // When vend_input_storage_options is enabled and no credential vendor is configured,
1975        // return the input storage options. This is useful for testing.
1976        if self.vend_input_storage_options {
1977            let mut options = self.storage_options.clone().unwrap_or_default();
1978            // Add expires_at_millis if refresh interval is configured
1979            if let Some(refresh_interval_millis) =
1980                self.vend_input_storage_options_refresh_interval_millis
1981            {
1982                let now_millis = std::time::SystemTime::now()
1983                    .duration_since(std::time::UNIX_EPOCH)
1984                    .unwrap()
1985                    .as_millis() as u64;
1986                let expires_at_millis = now_millis + refresh_interval_millis;
1987                options.insert(
1988                    "expires_at_millis".to_string(),
1989                    expires_at_millis.to_string(),
1990                );
1991            }
1992            return Ok(Some(options));
1993        }
1994        // When no credential vendor is configured, return None to avoid
1995        // leaking the namespace's own static credentials to clients.
1996        Ok(None)
1997    }
1998
1999    /// Migrate directory-based tables to the manifest.
2000    ///
2001    /// This is a one-time migration operation that:
2002    /// 1. Scans the directory for existing `.lance` tables
2003    /// 2. Registers any unmigrated tables in the manifest
2004    /// 3. Returns the count of tables that were migrated
2005    ///
2006    /// This method is safe to run multiple times - it will skip tables that are already
2007    /// registered in the manifest.
2008    ///
2009    /// # Usage
2010    ///
2011    /// After creating tables in directory-only mode or dual mode, you can migrate them
2012    /// to the manifest to enable manifest-only mode:
2013    ///
2014    /// ```no_run
2015    /// # use lance_namespace_impls::DirectoryNamespaceBuilder;
2016    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
2017    /// // Create namespace with dual mode (manifest + directory listing)
2018    /// let namespace = DirectoryNamespaceBuilder::new("/path/to/data")
2019    ///     .manifest_enabled(true)
2020    ///     .dir_listing_enabled(true)
2021    ///     .build()
2022    ///     .await?;
2023    ///
2024    /// // ... tables are created and used ...
2025    ///
2026    /// // Migrate existing directory tables to manifest
2027    /// let migrated_count = namespace.migrate().await?;
2028    /// println!("Migrated {} tables", migrated_count);
2029    ///
2030    /// // Now you can disable directory listing for better performance:
2031    /// // (requires rebuilding the namespace)
2032    /// let namespace = DirectoryNamespaceBuilder::new("/path/to/data")
2033    ///     .manifest_enabled(true)
2034    ///     .dir_listing_enabled(false)  // All tables now in manifest
2035    ///     .build()
2036    ///     .await?;
2037    /// # Ok(())
2038    /// # }
2039    /// ```
2040    ///
2041    /// # Returns
2042    ///
2043    /// Returns the number of tables that were migrated to the manifest.
2044    ///
2045    /// # Errors
2046    ///
2047    /// Returns an error if:
2048    /// - Manifest is not enabled
2049    /// - Directory listing fails
2050    /// - Manifest registration fails
2051    pub async fn migrate(&self) -> Result<usize> {
2052        // We only care about tables in the root namespace
2053        let Some(ref manifest_ns) = self.manifest_ns else {
2054            return Ok(0); // No manifest, nothing to migrate
2055        };
2056
2057        // Get all table locations already in the manifest
2058        let manifest_locations = manifest_ns.list_manifest_table_locations().await?;
2059
2060        // Get all tables from directory and skip declared-only tables that have not
2061        // written any actual version manifests yet.
2062        let dir_tables = self
2063            .filter_declared_tables(self.list_directory_tables().await?, false)
2064            .await?;
2065
2066        // Register each directory table that doesn't have an overlapping location
2067        // If a directory name already exists in the manifest,
2068        // that means the table must have already been migrated or created
2069        // in the manifest, so we can skip it.
2070        let mut migrated_count = 0;
2071        for table_name in dir_tables {
2072            // For root namespace tables, the directory name is "table_name.lance"
2073            let dir_name = format!("{}.lance", table_name);
2074            if !manifest_locations.contains(&dir_name) {
2075                manifest_ns.register_table(&table_name, dir_name).await?;
2076                migrated_count += 1;
2077            }
2078        }
2079
2080        Ok(migrated_count)
2081    }
2082
2083    /// Delete physical manifest files for the given table version ranges (best-effort).
2084    ///
2085    /// This helper is used by `batch_delete_table_versions` in both the manifest-enabled
2086    /// and non-manifest paths. It resolves each table's storage location, computes the
2087    /// version file paths, and attempts to delete them. Errors are logged (best-effort)
2088    /// when `best_effort` is true, or returned immediately when false.
2089    ///
2090    /// Returns the number of files successfully deleted.
2091    async fn delete_physical_version_files(
2092        &self,
2093        table_entries: &[TableDeleteEntry],
2094        best_effort: bool,
2095    ) -> Result<i64> {
2096        let mut deleted_count = 0i64;
2097        for te in table_entries {
2098            let table_uri = self.resolve_table_location(&te.table_id).await?;
2099            let table_path = self.object_store_path_from_uri(&table_uri)?;
2100            let versions_dir_path = table_path.clone().join(VERSIONS_DIR);
2101
2102            for (start, end) in &te.ranges {
2103                for version in *start..=*end {
2104                    let version_path = versions_dir_path
2105                        .clone()
2106                        .join(format!("{}.manifest", version as u64));
2107                    match self.object_store.inner.delete(&version_path).await {
2108                        Ok(_) => {
2109                            deleted_count += 1;
2110                        }
2111                        Err(object_store::Error::NotFound { .. }) => {}
2112                        Err(e) => {
2113                            if best_effort {
2114                                log::warn!(
2115                                    "Failed to delete manifest file for version {} of table {:?}: {:?}",
2116                                    version,
2117                                    te.table_id,
2118                                    e
2119                                );
2120                            } else {
2121                                return Err(NamespaceError::Internal {
2122                                    message: format!(
2123                                        "Failed to delete version {} for table at '{}': {}",
2124                                        version, table_uri, e
2125                                    ),
2126                                }
2127                                .into());
2128                            }
2129                        }
2130                    }
2131                }
2132            }
2133        }
2134        Ok(deleted_count)
2135    }
2136
2137    /// Apply all query parameters from a `QueryTableRequest`-like source onto a `Scanner`.
2138    ///
2139    /// This covers vector search, filters, column projection, limits, and ANN tuning knobs so
2140    /// that `explain_table_query_plan` / `analyze_table_query_plan` produce an accurate plan.
2141    #[allow(clippy::too_many_arguments)]
2142    fn apply_query_params_to_scanner(
2143        scanner: &mut Scanner,
2144        filter: Option<&str>,
2145        columns: Option<&QueryTableRequestColumns>,
2146        vector_column: Option<&str>,
2147        vector: &QueryTableRequestVector,
2148        k: i32,
2149        offset: Option<i32>,
2150        prefilter: Option<bool>,
2151        bypass_vector_index: Option<bool>,
2152        nprobes: Option<i32>,
2153        ef: Option<i32>,
2154        refine_factor: Option<i32>,
2155        distance_type: Option<&str>,
2156        fast_search_flag: Option<bool>,
2157        with_row_id: Option<bool>,
2158        lower_bound: Option<f32>,
2159        upper_bound: Option<f32>,
2160        operation: &str,
2161    ) -> Result<()> {
2162        // prefilter must be set before nearest() so the fragment-scan guard sees it.
2163        if let Some(pf) = prefilter {
2164            scanner.prefilter(pf);
2165        }
2166
2167        if let Some(filter) = filter {
2168            scanner.filter(filter).map_err(|e| {
2169                Error::invalid_input_source(
2170                    format!("Invalid filter expression for {}: {}", operation, e).into(),
2171                )
2172            })?;
2173        }
2174
2175        if let Some(cols) = columns {
2176            if let Some(ref names) = cols.column_names {
2177                scanner.project(names.as_slice()).map_err(|e| {
2178                    Error::invalid_input_source(
2179                        format!("Invalid column projection for {}: {}", operation, e).into(),
2180                    )
2181                })?;
2182            } else if let Some(ref aliases) = cols.column_aliases {
2183                // aliases maps output_alias -> source_column
2184                let pairs: Vec<(&str, &str)> = aliases
2185                    .iter()
2186                    .map(|(alias, src)| (alias.as_str(), src.as_str()))
2187                    .collect();
2188                scanner.project_with_transform(&pairs).map_err(|e| {
2189                    Error::invalid_input_source(
2190                        format!("Invalid column aliases for {}: {}", operation, e).into(),
2191                    )
2192                })?;
2193            }
2194        }
2195
2196        // Resolve query vector: prefer single_vector, fall back to first row of multi_vector.
2197        let query_vec: Option<Vec<f32>> = vector
2198            .single_vector
2199            .as_ref()
2200            .filter(|v| !v.is_empty())
2201            .cloned()
2202            .or_else(|| {
2203                vector
2204                    .multi_vector
2205                    .as_ref()
2206                    .and_then(|mv| mv.first())
2207                    .filter(|v| !v.is_empty())
2208                    .cloned()
2209            });
2210
2211        if let Some(q_vec) = query_vec {
2212            let col = vector_column.unwrap_or("vector");
2213            let q = Arc::new(Float32Array::from(q_vec));
2214            scanner
2215                .nearest(col, q.as_ref(), k.max(1) as usize)
2216                .map_err(|e| {
2217                    Error::invalid_input_source(
2218                        format!("Invalid vector query for {}: {}", operation, e).into(),
2219                    )
2220                })?;
2221
2222            // ANN parameters — must be applied after nearest().
2223            if let Some(n) = nprobes {
2224                scanner.nprobes(n.max(1) as usize);
2225            }
2226            if let Some(e) = ef {
2227                scanner.ef(e.max(1) as usize);
2228            }
2229            if let Some(rf) = refine_factor {
2230                scanner.refine(rf.max(0) as u32);
2231            }
2232            // bypass_vector_index and fast_search are mutually exclusive; apply in order.
2233            if let Some(true) = bypass_vector_index {
2234                scanner.use_index(false);
2235            }
2236            if let Some(true) = fast_search_flag {
2237                scanner.fast_search();
2238            }
2239            if lower_bound.is_some() || upper_bound.is_some() {
2240                scanner.distance_range(lower_bound, upper_bound);
2241            }
2242            if let Some(dt) = distance_type {
2243                let metric = Self::parse_metric_type(Some(dt))?;
2244                scanner.distance_metric(metric);
2245            }
2246            // Apply offset on top of the k nearest results.
2247            if let Some(off) = offset.filter(|&o| o > 0) {
2248                scanner.limit(None, Some(off as i64)).map_err(|e| {
2249                    Error::invalid_input_source(
2250                        format!("Invalid offset for {}: {}", operation, e).into(),
2251                    )
2252                })?;
2253            }
2254        } else {
2255            // Scalar (non-vector) query: treat k as a row LIMIT.
2256            let limit = if k > 0 { Some(k as i64) } else { None };
2257            scanner
2258                .limit(limit, offset.map(|o| o as i64))
2259                .map_err(|e| {
2260                    Error::invalid_input_source(
2261                        format!("Invalid limit/offset for {}: {}", operation, e).into(),
2262                    )
2263                })?;
2264        }
2265
2266        if let Some(true) = with_row_id {
2267            scanner.with_row_id();
2268        }
2269
2270        Ok(())
2271    }
2272
2273    /// Retrieve a snapshot of operation metrics.
2274    ///
2275    /// Returns a HashMap where keys are operation names (e.g., "list_tables", "describe_table")
2276    /// and values are the number of times each operation was called.
2277    ///
2278    /// Returns an empty HashMap if `ops_metrics_enabled` was false when building the namespace.
2279    pub fn retrieve_ops_metrics(&self) -> HashMap<String, u64> {
2280        self.ops_metrics
2281            .as_ref()
2282            .map(|m| m.retrieve())
2283            .unwrap_or_default()
2284    }
2285
2286    /// Reset all operation metrics counters to zero.
2287    ///
2288    /// Does nothing if `ops_metrics_enabled` was false when building the namespace.
2289    pub fn reset_ops_metrics(&self) {
2290        if let Some(ref metrics) = self.ops_metrics {
2291            metrics.reset();
2292        }
2293    }
2294
2295    /// Increment the counter for an operation.
2296    fn record_op(&self, operation: &str) {
2297        if let Some(ref metrics) = self.ops_metrics {
2298            metrics.increment(operation);
2299        }
2300    }
2301}
2302
2303#[async_trait]
2304impl LanceNamespace for DirectoryNamespace {
2305    async fn list_namespaces(
2306        &self,
2307        request: ListNamespacesRequest,
2308    ) -> Result<ListNamespacesResponse> {
2309        self.record_op("list_namespaces");
2310        if let Some(ref manifest_ns) = self.manifest_ns {
2311            return manifest_ns.list_namespaces(request).await;
2312        }
2313
2314        Self::validate_root_namespace_id(&request.id)?;
2315        Ok(ListNamespacesResponse::new(vec![]))
2316    }
2317
2318    async fn describe_namespace(
2319        &self,
2320        request: DescribeNamespaceRequest,
2321    ) -> Result<DescribeNamespaceResponse> {
2322        self.record_op("describe_namespace");
2323        if let Some(ref manifest_ns) = self.manifest_ns {
2324            return manifest_ns.describe_namespace(request).await;
2325        }
2326
2327        Self::validate_root_namespace_id(&request.id)?;
2328        #[allow(clippy::needless_update)]
2329        Ok(DescribeNamespaceResponse {
2330            properties: Some(HashMap::new()),
2331            ..Default::default()
2332        })
2333    }
2334
2335    async fn create_namespace(
2336        &self,
2337        request: CreateNamespaceRequest,
2338    ) -> Result<CreateNamespaceResponse> {
2339        self.record_op("create_namespace");
2340        if let Some(ref manifest_ns) = self.manifest_ns {
2341            return manifest_ns.create_namespace(request).await;
2342        }
2343
2344        if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2345            return Err(NamespaceError::NamespaceAlreadyExists {
2346                message: "root namespace".to_string(),
2347            }
2348            .into());
2349        }
2350
2351        Err(NamespaceError::Unsupported {
2352            message: "Child namespaces are only supported when manifest mode is enabled"
2353                .to_string(),
2354        }
2355        .into())
2356    }
2357
2358    async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> {
2359        self.record_op("drop_namespace");
2360        if let Some(ref manifest_ns) = self.manifest_ns {
2361            return manifest_ns.drop_namespace(request).await;
2362        }
2363
2364        if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2365            return Err(NamespaceError::InvalidInput {
2366                message: "Root namespace cannot be dropped".to_string(),
2367            }
2368            .into());
2369        }
2370
2371        Err(NamespaceError::Unsupported {
2372            message: "Child namespaces are only supported when manifest mode is enabled"
2373                .to_string(),
2374        }
2375        .into())
2376    }
2377
2378    async fn namespace_exists(&self, request: NamespaceExistsRequest) -> Result<()> {
2379        self.record_op("namespace_exists");
2380        if let Some(ref manifest_ns) = self.manifest_ns {
2381            return manifest_ns.namespace_exists(request).await;
2382        }
2383
2384        if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2385            return Ok(());
2386        }
2387
2388        Err(NamespaceError::NamespaceNotFound {
2389            message: "Child namespaces are only supported when manifest mode is enabled"
2390                .to_string(),
2391        }
2392        .into())
2393    }
2394
2395    async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
2396        self.record_op("list_tables");
2397        // Validate that namespace ID is provided
2398        let namespace_id = request.id.as_ref().ok_or_else(|| {
2399            lance_core::Error::from(NamespaceError::InvalidInput {
2400                message: "Namespace ID is required".to_string(),
2401            })
2402        })?;
2403
2404        // For child namespaces, always delegate to manifest (if enabled)
2405        if !namespace_id.is_empty() {
2406            if let Some(ref manifest_ns) = self.manifest_ns {
2407                return manifest_ns.list_tables(request).await;
2408            }
2409            return Err(NamespaceError::Unsupported {
2410                message: "Child namespaces are only supported when manifest mode is enabled"
2411                    .to_string(),
2412            }
2413            .into());
2414        }
2415
2416        // When only manifest is enabled (no directory listing), delegate directly to manifest
2417        if let Some(ref manifest_ns) = self.manifest_ns
2418            && !self.dir_listing_enabled
2419        {
2420            return manifest_ns.list_tables(request).await;
2421        }
2422
2423        // When both manifest and directory listing are enabled with migration mode,
2424        // we need to merge and deduplicate
2425        let mut tables = if self.manifest_ns.is_some()
2426            && self.dir_listing_enabled
2427            && self.dir_listing_to_manifest_migration_enabled
2428        {
2429            // Get all manifest table locations (for deduplication)
2430            let manifest_locations = if let Some(ref manifest_ns) = self.manifest_ns {
2431                manifest_ns.list_manifest_table_locations().await?
2432            } else {
2433                std::collections::HashSet::new()
2434            };
2435
2436            // Get all manifest tables (without pagination for merging)
2437            let mut manifest_request = request.clone();
2438            manifest_request.limit = None;
2439            manifest_request.page_token = None;
2440            let manifest_tables = if let Some(ref manifest_ns) = self.manifest_ns {
2441                let manifest_response = manifest_ns.list_tables(manifest_request).await?;
2442                manifest_response.tables
2443            } else {
2444                vec![]
2445            };
2446
2447            // Start with all manifest table names
2448            // Add directory tables that aren't already in the manifest (by location)
2449            let mut all_tables: Vec<String> = manifest_tables;
2450            let dir_tables = self.list_directory_tables().await?;
2451            for table_name in dir_tables {
2452                // Check if this table's location is already in the manifest
2453                // Manifest stores full URIs, so we need to check both formats
2454                let full_location = format!("{}/{}.lance", self.root, table_name);
2455                let relative_location = format!("{}.lance", table_name);
2456                if !manifest_locations.contains(&full_location)
2457                    && !manifest_locations.contains(&relative_location)
2458                {
2459                    all_tables.push(table_name);
2460                }
2461            }
2462
2463            all_tables
2464        } else {
2465            self.list_directory_tables().await?
2466        };
2467
2468        tables = self
2469            .filter_declared_tables(tables, request.include_declared.unwrap_or(true))
2470            .await?;
2471
2472        // Apply sorting and pagination
2473        let next_page_token =
2474            Self::apply_pagination(&mut tables, request.page_token, request.limit);
2475        let mut response = ListTablesResponse::new(tables);
2476        response.page_token = next_page_token;
2477        Ok(response)
2478    }
2479
2480    async fn describe_table(&self, request: DescribeTableRequest) -> Result<DescribeTableResponse> {
2481        self.record_op("describe_table");
2482        self.describe_table_impl(request).await
2483    }
2484
2485    async fn table_exists(&self, request: TableExistsRequest) -> Result<()> {
2486        self.record_op("table_exists");
2487        let is_root_level = request.id.as_ref().is_some_and(|id| id.len() == 1);
2488        let skip_manifest_for_root = self.dir_listing_enabled
2489            && is_root_level
2490            && !self.dir_listing_to_manifest_migration_enabled;
2491        if let Some(ref manifest_ns) = self.manifest_ns
2492            && !skip_manifest_for_root
2493        {
2494            match manifest_ns.table_exists(request.clone()).await {
2495                Ok(()) => return Ok(()),
2496                Err(_) if self.dir_listing_enabled && is_root_level => {
2497                    // Fall through to directory check only for single-level IDs
2498                }
2499                Err(e) => return Err(e),
2500            }
2501        }
2502
2503        let table_name = Self::table_name_from_id(&request.id)?;
2504        let table_id = Self::format_table_id_from_request(&request.id);
2505
2506        // Atomically check table existence and deregistration status
2507        let status = self.check_table_status(&table_name).await;
2508
2509        if !status.exists {
2510            return Err(NamespaceError::TableNotFound {
2511                message: table_id.clone(),
2512            }
2513            .into());
2514        }
2515
2516        if status.is_deregistered {
2517            return Err(NamespaceError::TableNotFound {
2518                message: format!("Table is deregistered: {}", table_id),
2519            }
2520            .into());
2521        }
2522
2523        Ok(())
2524    }
2525
2526    async fn drop_table(&self, request: DropTableRequest) -> Result<DropTableResponse> {
2527        self.record_op("drop_table");
2528        if let Some(ref manifest_ns) = self.manifest_ns {
2529            return manifest_ns.drop_table(request).await;
2530        }
2531
2532        let table_name = Self::table_name_from_id(&request.id)?;
2533        let table_uri = self.table_full_uri(&table_name);
2534        let table_path = self.table_path(&table_name);
2535
2536        self.object_store
2537            .remove_dir_all(table_path)
2538            .await
2539            .map_err(|e| {
2540                lance_core::Error::from(NamespaceError::Internal {
2541                    message: format!("Failed to drop table {}: {:?}", table_name, e),
2542                })
2543            })?;
2544
2545        Ok(DropTableResponse {
2546            id: request.id,
2547            location: Some(table_uri),
2548            ..Default::default()
2549        })
2550    }
2551
2552    async fn create_table(
2553        &self,
2554        request: CreateTableRequest,
2555        request_data: Bytes,
2556    ) -> Result<CreateTableResponse> {
2557        self.record_op("create_table");
2558        if let Some(ref manifest_ns) = self.manifest_ns {
2559            return manifest_ns.create_table(request, request_data).await;
2560        }
2561
2562        Self::validate_dir_only_properties(request.properties.as_ref(), "create_table")?;
2563
2564        let table_name = Self::table_name_from_id(&request.id)?;
2565        let table_uri = self.table_full_uri(&table_name);
2566        let status = self.check_table_status(&table_name).await;
2567        let (reader, _num_rows) =
2568            Self::ipc_reader_from_request_data(&request_data, "create_table")?;
2569
2570        if status.exists && self.table_has_actual_manifests(&table_name).await? {
2571            return Err(NamespaceError::TableAlreadyExists {
2572                message: table_name,
2573            }
2574            .into());
2575        }
2576
2577        let write_result = self
2578            .write_reader_to_table(
2579                &table_uri,
2580                reader,
2581                WriteMode::Create,
2582                request.storage_options.clone(),
2583            )
2584            .await;
2585        if let Err(err) = write_result {
2586            if self.table_uri_has_actual_manifests(&table_uri).await? {
2587                return Err(NamespaceError::TableAlreadyExists {
2588                    message: table_name,
2589                }
2590                .into());
2591            }
2592            return Err(err);
2593        }
2594        Ok(CreateTableResponse {
2595            version: Some(1),
2596            location: Some(table_uri),
2597            storage_options: self.storage_options.clone(),
2598            properties: request.properties,
2599            ..Default::default()
2600        })
2601    }
2602
2603    async fn declare_table(&self, request: DeclareTableRequest) -> Result<DeclareTableResponse> {
2604        self.record_op("declare_table");
2605        if let Some(ref manifest_ns) = self.manifest_ns {
2606            let mut response = manifest_ns.declare_table(request.clone()).await?;
2607            if let Some(ref location) = response.location {
2608                // For backwards compatibility, only skip vending credentials when explicitly set to false
2609                let vend = request.vend_credentials.unwrap_or(true);
2610                let identity = request.identity.as_deref();
2611                response.storage_options = self
2612                    .get_storage_options_for_table(location, vend, identity)
2613                    .await?;
2614            }
2615            // Set managed_versioning when table_version_tracking_enabled
2616            if self.table_version_tracking_enabled {
2617                response.managed_versioning = Some(true);
2618            }
2619            return Ok(response);
2620        }
2621
2622        Self::validate_dir_only_properties(request.properties.as_ref(), "declare_table")?;
2623
2624        let table_name = Self::table_name_from_id(&request.id)?;
2625        let table_uri = self.table_full_uri(&table_name);
2626
2627        // Validate location if provided
2628        if let Some(location) = &request.location {
2629            let location = location.trim_end_matches('/');
2630            if location != table_uri {
2631                return Err(NamespaceError::InvalidInput {
2632                    message: format!(
2633                        "Cannot declare table {} at location {}, must be at location {}",
2634                        table_name, location, table_uri
2635                    ),
2636                }
2637                .into());
2638            }
2639        }
2640
2641        // Check if table already has data (created via create_table).
2642        // The atomic put only prevents races between concurrent declare_table calls,
2643        // not between declare_table and existing data.
2644        let status = self.check_table_status(&table_name).await;
2645        if status.exists && !status.has_reserved_file {
2646            // Table has data but no reserved file - it was created with data
2647            return Err(NamespaceError::TableAlreadyExists {
2648                message: table_name.to_string(),
2649            }
2650            .into());
2651        }
2652
2653        // Atomically create the .lance-reserved file to mark the table as declared.
2654        // This uses put_if_not_exists semantics to avoid race conditions between
2655        // concurrent declare_table calls.
2656        let reserved_file_path = self.table_reserved_file_path(&table_name);
2657
2658        self.put_marker_file_atomic(&reserved_file_path, &format!("table {}", table_name))
2659            .await
2660            .map_err(|e| {
2661                if e.contains("already exists") {
2662                    lance_core::Error::from(NamespaceError::TableAlreadyExists {
2663                        message: table_name.to_string(),
2664                    })
2665                } else {
2666                    lance_core::Error::from(NamespaceError::Internal { message: e })
2667                }
2668            })?;
2669
2670        // For backwards compatibility, only skip vending credentials when explicitly set to false
2671        let vend_credentials = request.vend_credentials.unwrap_or(true);
2672        let identity = request.identity.as_deref();
2673        let storage_options = self
2674            .get_storage_options_for_table(&table_uri, vend_credentials, identity)
2675            .await?;
2676
2677        Ok(DeclareTableResponse {
2678            location: Some(table_uri),
2679            storage_options,
2680            properties: request.properties,
2681            managed_versioning: if self.table_version_tracking_enabled {
2682                Some(true)
2683            } else {
2684                None
2685            },
2686            ..Default::default()
2687        })
2688    }
2689
2690    async fn register_table(
2691        &self,
2692        request: lance_namespace::models::RegisterTableRequest,
2693    ) -> Result<lance_namespace::models::RegisterTableResponse> {
2694        self.record_op("register_table");
2695        // If manifest is enabled, delegate to manifest namespace
2696        if let Some(ref manifest_ns) = self.manifest_ns {
2697            return LanceNamespace::register_table(manifest_ns.as_ref(), request).await;
2698        }
2699
2700        // Without manifest, register_table is not supported
2701        Err(NamespaceError::Unsupported {
2702            message: "register_table is only supported when manifest mode is enabled".to_string(),
2703        }
2704        .into())
2705    }
2706
2707    async fn deregister_table(
2708        &self,
2709        request: lance_namespace::models::DeregisterTableRequest,
2710    ) -> Result<lance_namespace::models::DeregisterTableResponse> {
2711        self.record_op("deregister_table");
2712        // If manifest is enabled, delegate to manifest namespace
2713        if let Some(ref manifest_ns) = self.manifest_ns {
2714            return LanceNamespace::deregister_table(manifest_ns.as_ref(), request).await;
2715        }
2716
2717        // V1 mode: create a .lance-deregistered marker file in the table directory
2718        let table_name = Self::table_name_from_id(&request.id)?;
2719        let table_uri = self.table_full_uri(&table_name);
2720
2721        // Check table existence and deregistration status.
2722        // This provides better error messages for common cases.
2723        let status = self.check_table_status(&table_name).await;
2724
2725        if !status.exists {
2726            return Err(NamespaceError::TableNotFound {
2727                message: table_name.to_string(),
2728            }
2729            .into());
2730        }
2731
2732        if status.is_deregistered {
2733            return Err(NamespaceError::TableNotFound {
2734                message: format!("Table is already deregistered: {}", table_name),
2735            }
2736            .into());
2737        }
2738
2739        // Atomically create the .lance-deregistered marker file.
2740        // This uses put_if_not_exists semantics to prevent race conditions
2741        // when multiple processes try to deregister the same table concurrently.
2742        // If a race occurs and another process already created the file,
2743        // we'll get an AlreadyExists error which we convert to a proper message.
2744        let deregistered_path = self.table_deregistered_file_path(&table_name);
2745        self.put_marker_file_atomic(
2746            &deregistered_path,
2747            &format!("deregistration marker for table {}", table_name),
2748        )
2749        .await
2750        .map_err(|e| {
2751            if e.contains("already exists") {
2752                lance_core::Error::from(NamespaceError::InvalidTableState {
2753                    message: format!("Table is already deregistered: {}", table_name),
2754                })
2755            } else {
2756                lance_core::Error::from(NamespaceError::Internal { message: e })
2757            }
2758        })?;
2759
2760        Ok(lance_namespace::models::DeregisterTableResponse {
2761            id: request.id,
2762            location: Some(table_uri),
2763            ..Default::default()
2764        })
2765    }
2766
2767    async fn list_table_versions(
2768        &self,
2769        request: ListTableVersionsRequest,
2770    ) -> Result<ListTableVersionsResponse> {
2771        self.record_op("list_table_versions");
2772        // When table_version_storage_enabled, query from __manifest
2773        if self.table_version_storage_enabled
2774            && let Some(ref manifest_ns) = self.manifest_ns
2775        {
2776            let table_id = request.id.clone().unwrap_or_default();
2777            let want_descending = request.descending == Some(true);
2778            return manifest_ns
2779                .list_table_versions(&table_id, want_descending, request.limit)
2780                .await;
2781        }
2782
2783        // Fallback when table_version_storage is not enabled: list from _versions/ directory
2784        let table_uri = self.resolve_table_location(&request.id).await?;
2785        let want_descending = request.descending == Some(true);
2786        let table_versions = self
2787            .list_table_versions_from_storage(&table_uri, want_descending, request.limit)
2788            .await?;
2789
2790        Ok(ListTableVersionsResponse {
2791            versions: table_versions,
2792            page_token: None,
2793        })
2794    }
2795
2796    async fn create_table_version(
2797        &self,
2798        request: CreateTableVersionRequest,
2799    ) -> Result<CreateTableVersionResponse> {
2800        self.record_op("create_table_version");
2801        let table_uri = self.resolve_table_location(&request.id).await?;
2802
2803        let staging_manifest_path = &request.manifest_path;
2804        let version = request.version as u64;
2805
2806        let table_path = self.object_store_path_from_uri(&table_uri)?;
2807
2808        // Determine naming scheme from request, default to V2
2809        let naming_scheme = match request.naming_scheme.as_deref() {
2810            Some("V1") => ManifestNamingScheme::V1,
2811            _ => ManifestNamingScheme::V2,
2812        };
2813
2814        // Compute final path using the naming scheme
2815        let final_path = naming_scheme.manifest_path(&table_path, version);
2816
2817        let staging_path = Path::parse(staging_manifest_path).map_err(|e| {
2818            lance_core::Error::from(NamespaceError::InvalidInput {
2819                message: format!(
2820                    "Invalid staging manifest path '{}': {}",
2821                    staging_manifest_path, e
2822                ),
2823            })
2824        })?;
2825
2826        let copy_result = match self
2827            .object_store
2828            .inner
2829            .copy_if_not_exists(&staging_path, &final_path)
2830            .await
2831        {
2832            Ok(()) => Ok(()),
2833            Err(ObjectStoreError::NotImplemented { .. })
2834            | Err(ObjectStoreError::NotSupported { .. }) => {
2835                let manifest_data = self
2836                    .object_store
2837                    .inner
2838                    .get(&staging_path)
2839                    .await
2840                    .map_err(|e| {
2841                        lance_core::Error::from(NamespaceError::Internal {
2842                            message: format!(
2843                                "Failed to read staging manifest at '{}': {}",
2844                                staging_manifest_path, e
2845                            ),
2846                        })
2847                    })?
2848                    .bytes()
2849                    .await
2850                    .map_err(|e| {
2851                        lance_core::Error::from(NamespaceError::Internal {
2852                            message: format!(
2853                                "Failed to read staging manifest bytes at '{}': {}",
2854                                staging_manifest_path, e
2855                            ),
2856                        })
2857                    })?;
2858                self.object_store
2859                    .inner
2860                    .put_opts(
2861                        &final_path,
2862                        manifest_data.into(),
2863                        PutOptions {
2864                            mode: PutMode::Create,
2865                            ..Default::default()
2866                        },
2867                    )
2868                    .await
2869                    .map(|_| ())
2870            }
2871            Err(e) => Err(e),
2872        };
2873
2874        match copy_result {
2875            Ok(()) => {}
2876            Err(ObjectStoreError::AlreadyExists { .. })
2877            | Err(ObjectStoreError::Precondition { .. }) => {
2878                return Err(lance_core::Error::from(
2879                    NamespaceError::ConcurrentModification {
2880                        message: format!(
2881                            "Version {} already exists for table at '{}'",
2882                            version, table_uri
2883                        ),
2884                    },
2885                ));
2886            }
2887            Err(e) => {
2888                return Err(lance_core::Error::from(NamespaceError::Internal {
2889                    message: format!(
2890                        "Failed to create version {} for table at '{}': {}",
2891                        version, table_uri, e
2892                    ),
2893                }));
2894            }
2895        }
2896
2897        let final_meta = self
2898            .object_store
2899            .inner
2900            .head(&final_path)
2901            .await
2902            .map_err(|e| {
2903                lance_core::Error::from(NamespaceError::Internal {
2904                    message: format!(
2905                        "Failed to stat created version {} for table at '{}': {}",
2906                        version, table_uri, e
2907                    ),
2908                })
2909            })?;
2910        let manifest_size = final_meta.size as i64;
2911
2912        // Delete the staging manifest after successful copy
2913        if let Err(e) = self.object_store.inner.delete(&staging_path).await {
2914            log::warn!(
2915                "Failed to delete staging manifest at '{}': {:?}",
2916                staging_path,
2917                e
2918            );
2919        }
2920
2921        // If table_version_storage_enabled is enabled, also record in __manifest (best-effort)
2922        if self.table_version_storage_enabled
2923            && let Some(ref manifest_ns) = self.manifest_ns
2924        {
2925            let table_id_str =
2926                manifest::ManifestNamespace::str_object_id(&request.id.clone().unwrap_or_default());
2927            let object_id =
2928                manifest::ManifestNamespace::build_version_object_id(&table_id_str, version as i64);
2929            let metadata_json = serde_json::json!({
2930                "manifest_path": final_path.to_string(),
2931                "manifest_size": manifest_size,
2932                "e_tag": final_meta.e_tag,
2933                "naming_scheme": request.naming_scheme.as_deref().unwrap_or("V2"),
2934            })
2935            .to_string();
2936
2937            if let Err(e) = manifest_ns
2938                .insert_into_manifest_with_metadata(
2939                    vec![manifest::ManifestEntry {
2940                        object_id,
2941                        object_type: manifest::ObjectType::TableVersion,
2942                        location: None,
2943                        metadata: Some(metadata_json),
2944                    }],
2945                    None,
2946                )
2947                .await
2948            {
2949                log::warn!(
2950                    "Failed to record table version in __manifest (best-effort): {:?}",
2951                    e
2952                );
2953            }
2954        }
2955
2956        Ok(CreateTableVersionResponse {
2957            transaction_id: None,
2958            version: Some(Box::new(TableVersion {
2959                version: version as i64,
2960                manifest_path: final_path.to_string(),
2961                manifest_size: Some(manifest_size),
2962                e_tag: final_meta.e_tag,
2963                timestamp_millis: None,
2964                metadata: None,
2965            })),
2966        })
2967    }
2968
2969    async fn describe_table_version(
2970        &self,
2971        request: DescribeTableVersionRequest,
2972    ) -> Result<DescribeTableVersionResponse> {
2973        self.record_op("describe_table_version");
2974        // When table_version_storage_enabled and a specific version is requested,
2975        // query from __manifest to avoid opening the entire dataset
2976        if self.table_version_storage_enabled
2977            && let (Some(manifest_ns), Some(version)) = (&self.manifest_ns, request.version)
2978        {
2979            let table_id = request.id.clone().unwrap_or_default();
2980            return manifest_ns.describe_table_version(&table_id, version).await;
2981        }
2982
2983        // Fallback when table_version_storage is not enabled: inspect physical manifests directly.
2984        let table_uri = self.resolve_table_location(&request.id).await?;
2985        let versions = self
2986            .list_table_versions_from_storage(&table_uri, true, None)
2987            .await?;
2988        let table_version = if let Some(requested_version) = request.version {
2989            versions
2990                .into_iter()
2991                .find(|version| version.version == requested_version)
2992                .ok_or_else(|| {
2993                    lance_core::Error::from(NamespaceError::TableVersionNotFound {
2994                        message: format!(
2995                            "version {} for table {}",
2996                            requested_version,
2997                            Self::format_table_id_from_request(&request.id)
2998                        ),
2999                    })
3000                })?
3001        } else {
3002            versions.into_iter().next().ok_or_else(|| {
3003                lance_core::Error::from(NamespaceError::TableVersionNotFound {
3004                    message: format!(
3005                        "latest version for table {}",
3006                        Self::format_table_id_from_request(&request.id)
3007                    ),
3008                })
3009            })?
3010        };
3011
3012        Ok(DescribeTableVersionResponse {
3013            version: Box::new(table_version),
3014        })
3015    }
3016
3017    async fn batch_delete_table_versions(
3018        &self,
3019        request: BatchDeleteTableVersionsRequest,
3020    ) -> Result<BatchDeleteTableVersionsResponse> {
3021        self.record_op("batch_delete_table_versions");
3022        // Single-table mode: use `id` (from path parameter) + `ranges` to delete
3023        // versions from one table.
3024        let ranges: Vec<(i64, i64)> = request
3025            .ranges
3026            .iter()
3027            .map(|r| {
3028                let start = r.start_version;
3029                let end = if r.end_version > 0 {
3030                    r.end_version
3031                } else {
3032                    start
3033                };
3034                (start, end)
3035            })
3036            .collect();
3037        let table_entries = vec![TableDeleteEntry {
3038            table_id: request.id.clone(),
3039            ranges,
3040        }];
3041
3042        let mut total_deleted_count = 0i64;
3043
3044        if self.table_version_storage_enabled
3045            && let Some(ref manifest_ns) = self.manifest_ns
3046        {
3047            // Phase 1 (atomic commit point): Delete version records from __manifest
3048            // for ALL tables in a single atomic operation. This is the authoritative
3049            // source of truth — once __manifest entries are removed, the versions
3050            // are logically deleted across all tables atomically.
3051
3052            // Collect all (table_id_str, ranges) for batch deletion
3053            let mut all_object_ids: Vec<String> = Vec::new();
3054            for te in &table_entries {
3055                let table_id_str = manifest::ManifestNamespace::str_object_id(
3056                    &te.table_id.clone().unwrap_or_default(),
3057                );
3058                for (start, end) in &te.ranges {
3059                    for version in *start..=*end {
3060                        let object_id = manifest::ManifestNamespace::build_version_object_id(
3061                            &table_id_str,
3062                            version,
3063                        );
3064                        all_object_ids.push(object_id);
3065                    }
3066                }
3067            }
3068
3069            if !all_object_ids.is_empty() {
3070                total_deleted_count = manifest_ns
3071                    .batch_delete_table_versions_by_object_ids(&all_object_ids)
3072                    .await?;
3073            }
3074
3075            // Phase 2: Delete physical manifest files (best-effort).
3076            // Even if some file deletions fail, the versions are already removed from
3077            // __manifest, so they won't be visible to readers. Leftover files are
3078            // orphaned but harmless and can be cleaned up later.
3079            let _ = self
3080                .delete_physical_version_files(&table_entries, true)
3081                .await;
3082
3083            return Ok(BatchDeleteTableVersionsResponse {
3084                deleted_count: Some(total_deleted_count),
3085                transaction_id: None,
3086            });
3087        }
3088
3089        // Fallback when table_version_storage is not enabled: delete physical files directly (no __manifest)
3090        total_deleted_count = self
3091            .delete_physical_version_files(&table_entries, false)
3092            .await?;
3093
3094        Ok(BatchDeleteTableVersionsResponse {
3095            deleted_count: Some(total_deleted_count),
3096            transaction_id: None,
3097        })
3098    }
3099
3100    async fn create_table_index(
3101        &self,
3102        request: CreateTableIndexRequest,
3103    ) -> Result<CreateTableIndexResponse> {
3104        self.record_op("create_table_index");
3105        let table_uri = self.resolve_table_location(&request.id).await?;
3106        let mut dataset = self
3107            .load_dataset(&table_uri, None, "create_table_index")
3108            .await?;
3109        let index_request = Self::build_index_params(&request)?;
3110
3111        dataset
3112            .create_index(
3113                &[request.column.as_str()],
3114                index_request.index_type(),
3115                request.name.clone(),
3116                index_request.params(),
3117                false,
3118            )
3119            .await
3120            .map_err(|e| {
3121                let err_msg = format!("{}", e);
3122                let ns_err = if err_msg.contains("already exists") {
3123                    NamespaceError::TableIndexAlreadyExists {
3124                        message: format!(
3125                            "Index '{}' already exists on table '{}': {:?}",
3126                            request.name.as_deref().unwrap_or("<auto-generated>"),
3127                            table_uri,
3128                            e
3129                        ),
3130                    }
3131                } else if err_msg.contains("not found") || err_msg.contains("does not exist") {
3132                    NamespaceError::TableColumnNotFound {
3133                        message: format!(
3134                            "Column '{}' not found for table '{}': {:?}",
3135                            request.column, table_uri, e
3136                        ),
3137                    }
3138                } else {
3139                    NamespaceError::Internal {
3140                        message: format!(
3141                            "Failed to create {} index '{}' on column '{}' for table '{}': {:?}",
3142                            request.index_type,
3143                            request.name.as_deref().unwrap_or("<auto-generated>"),
3144                            request.column,
3145                            table_uri,
3146                            e
3147                        ),
3148                    }
3149                };
3150                lance_core::Error::from(ns_err)
3151            })?;
3152
3153        let transaction_id = dataset
3154            .read_transaction()
3155            .await
3156            .map_err(|e| {
3157                lance_core::Error::from(NamespaceError::Internal {
3158                    message: format!(
3159                        "Failed to read committed transaction after creating index on '{}': {}",
3160                        table_uri, e
3161                    ),
3162                })
3163            })?
3164            .map(|transaction| transaction.uuid);
3165
3166        Ok(CreateTableIndexResponse { transaction_id })
3167    }
3168
3169    async fn list_table_indices(
3170        &self,
3171        request: ListTableIndicesRequest,
3172    ) -> Result<ListTableIndicesResponse> {
3173        self.record_op("list_table_indices");
3174        let table_uri = self.resolve_table_location(&request.id).await?;
3175        let dataset = self
3176            .load_dataset(&table_uri, request.version, "list_table_indices")
3177            .await?;
3178        let mut indices = dataset
3179            .describe_indices(None)
3180            .await
3181            .map_err(|e| {
3182                lance_core::Error::from(NamespaceError::Internal {
3183                    message: format!("Failed to describe table indices for '{}': {:?}", table_uri, e),
3184                })
3185            })?
3186            .into_iter()
3187            .filter(|description| {
3188                description
3189                    .metadata()
3190                    .first()
3191                    .map(|metadata| !is_system_index(metadata))
3192                    .unwrap_or(false)
3193            })
3194            .map(|description| {
3195                let columns = description
3196                    .field_ids()
3197                    .iter()
3198                        .map(|field_id| {
3199                        dataset
3200                            .schema()
3201                            .field_path(i32::try_from(*field_id).map_err(|e| {
3202                                lance_core::Error::from(NamespaceError::Internal {
3203                                    message: format!(
3204                                        "Field id {} does not fit in i32 for table '{}': {}",
3205                                        field_id, table_uri, e
3206                                    ),
3207                                })
3208                            })?)
3209                            .map_err(|e| {
3210                            lance_core::Error::from(NamespaceError::Internal {
3211                                message: format!(
3212                                    "Failed to resolve field path for field_id {} in table '{}': {}",
3213                                    field_id, table_uri, e
3214                                ),
3215                            })
3216                        })
3217                    })
3218                    .collect::<Result<Vec<_>>>()?;
3219
3220                Ok(IndexContent {
3221                    index_name: description.name().to_string(),
3222                    index_uuid: description.metadata()[0].uuid.to_string(),
3223                    columns,
3224                    status: "SUCCEEDED".to_string(),
3225                })
3226            })
3227            .collect::<Result<Vec<_>>>()?;
3228
3229        let page_token = Self::paginate_indices(&mut indices, request.page_token, request.limit);
3230        Ok(ListTableIndicesResponse {
3231            indexes: indices,
3232            page_token,
3233        })
3234    }
3235
3236    async fn describe_table_index_stats(
3237        &self,
3238        request: DescribeTableIndexStatsRequest,
3239    ) -> Result<DescribeTableIndexStatsResponse> {
3240        self.record_op("describe_table_index_stats");
3241        let table_uri = self.resolve_table_location(&request.id).await?;
3242        let dataset = self
3243            .load_dataset(&table_uri, request.version, "describe_table_index_stats")
3244            .await?;
3245        let index_name = request.index_name.as_deref().ok_or_else(|| {
3246            lance_core::Error::from(NamespaceError::InvalidInput {
3247                message: "Index name is required for describe_table_index_stats".to_string(),
3248            })
3249        })?;
3250        let metadatas = dataset
3251            .load_indices_by_name(index_name)
3252            .await
3253            .map_err(|e| {
3254                lance_core::Error::from(NamespaceError::TableIndexNotFound {
3255                    message: format!(
3256                        "Failed to load index '{}' metadata for table '{}': {}",
3257                        index_name, table_uri, e
3258                    ),
3259                })
3260            })?;
3261        if metadatas.first().is_some_and(is_system_index) {
3262            return Err(NamespaceError::Unsupported {
3263                message: format!("System index '{}' is not exposed by this API", index_name),
3264            }
3265            .into());
3266        }
3267
3268        let stats = <Dataset as DatasetIndexExt>::index_statistics(&dataset, index_name)
3269            .await
3270            .map_err(|e| {
3271                lance_core::Error::from(NamespaceError::TableIndexNotFound {
3272                    message: format!(
3273                        "Failed to describe index statistics for '{}' on table '{}': {}",
3274                        index_name, table_uri, e
3275                    ),
3276                })
3277            })?;
3278        let stats: serde_json::Value = serde_json::from_str(&stats).map_err(|e| {
3279            lance_core::Error::from(NamespaceError::Internal {
3280                message: format!(
3281                    "Failed to parse index statistics for '{}' on table '{}': {}",
3282                    index_name, table_uri, e
3283                ),
3284            })
3285        })?;
3286
3287        Ok(Self::describe_table_index_stats_response(&stats))
3288    }
3289
3290    async fn describe_transaction(
3291        &self,
3292        request: DescribeTransactionRequest,
3293    ) -> Result<DescribeTransactionResponse> {
3294        self.record_op("describe_transaction");
3295        let mut request_id = request.id.ok_or_else(|| {
3296            lance_core::Error::from(NamespaceError::InvalidInput {
3297                message: "Transaction id must include table id and transaction identifier"
3298                    .to_string(),
3299            })
3300        })?;
3301        if request_id.len() < 2 {
3302            return Err(NamespaceError::InvalidInput {
3303                message: format!(
3304                    "Transaction request id must include table id and transaction identifier, got {:?}",
3305                    request_id
3306                ),
3307            }
3308            .into());
3309        }
3310
3311        let id = request_id.pop().expect("request_id len checked above");
3312        let table_id = Some(request_id);
3313        let table_uri = self.resolve_table_location(&table_id).await?;
3314        let dataset = self
3315            .load_dataset(&table_uri, None, "describe_transaction")
3316            .await?;
3317        let (version, transaction) = self.find_transaction(&dataset, &id).await?;
3318
3319        Ok(Self::transaction_response(version, &transaction))
3320    }
3321
3322    async fn create_table_scalar_index(
3323        &self,
3324        request: CreateTableIndexRequest,
3325    ) -> Result<CreateTableScalarIndexResponse> {
3326        self.record_op("create_table_scalar_index");
3327        let index_type = Self::parse_index_type(&request.index_type)?;
3328        if !index_type.is_scalar() {
3329            return Err(NamespaceError::InvalidInput {
3330                message: format!(
3331                    "create_table_scalar_index only supports scalar index types, got {}",
3332                    request.index_type
3333                ),
3334            }
3335            .into());
3336        }
3337
3338        let response = self.create_table_index(request).await?;
3339        Ok(CreateTableScalarIndexResponse {
3340            transaction_id: response.transaction_id,
3341        })
3342    }
3343
3344    async fn drop_table_index(
3345        &self,
3346        request: DropTableIndexRequest,
3347    ) -> Result<DropTableIndexResponse> {
3348        self.record_op("drop_table_index");
3349        let table_uri = self.resolve_table_location(&request.id).await?;
3350        let index_name = request.index_name.as_deref().ok_or_else(|| {
3351            lance_core::Error::from(NamespaceError::InvalidInput {
3352                message: "Index name is required for drop_table_index".to_string(),
3353            })
3354        })?;
3355        let mut dataset = self
3356            .load_dataset(&table_uri, None, "drop_table_index")
3357            .await?;
3358        let metadatas = dataset
3359            .load_indices_by_name(index_name)
3360            .await
3361            .map_err(|e| {
3362                lance_core::Error::from(NamespaceError::TableIndexNotFound {
3363                    message: format!(
3364                        "Failed to load index '{}' before dropping it from table '{}': {}",
3365                        index_name, table_uri, e
3366                    ),
3367                })
3368            })?;
3369        if metadatas.first().is_some_and(is_system_index) {
3370            return Err(NamespaceError::Unsupported {
3371                message: format!(
3372                    "System index '{}' cannot be dropped via this API",
3373                    index_name
3374                ),
3375            }
3376            .into());
3377        }
3378
3379        dataset.drop_index(index_name).await.map_err(|e| {
3380            lance_core::Error::from(NamespaceError::TableIndexNotFound {
3381                message: format!(
3382                    "Failed to drop index '{}' from table '{}': {}",
3383                    index_name, table_uri, e
3384                ),
3385            })
3386        })?;
3387
3388        let transaction_id = dataset
3389            .read_transaction()
3390            .await
3391            .map_err(|e| {
3392                lance_core::Error::from(NamespaceError::Internal {
3393                    message: format!(
3394                        "Failed to read committed transaction after dropping index '{}' from '{}': {}",
3395                        index_name, table_uri, e
3396                    ),
3397                })
3398            })?
3399            .map(|transaction| transaction.uuid);
3400
3401        Ok(DropTableIndexResponse { transaction_id })
3402    }
3403
3404    async fn list_all_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
3405        // In dir-only mode there are no child namespaces, so all tables live in the
3406        // root directory. This is equivalent to listing the root namespace.
3407        let mut tables = self.list_directory_tables().await?;
3408        tables = self
3409            .filter_declared_tables(tables, request.include_declared.unwrap_or(true))
3410            .await?;
3411        Self::apply_pagination(&mut tables, request.page_token, request.limit);
3412        Ok(ListTablesResponse::new(tables))
3413    }
3414
3415    async fn restore_table(&self, request: RestoreTableRequest) -> Result<RestoreTableResponse> {
3416        let version = request.version;
3417        if version < 0 {
3418            return Err(Error::invalid_input_source(
3419                format!(
3420                    "Table version for restore_table must be non-negative, got {}",
3421                    version
3422                )
3423                .into(),
3424            ));
3425        }
3426
3427        let table_uri = self.resolve_table_location(&request.id).await?;
3428        let mut dataset = self.load_dataset(&table_uri, None, "restore_table").await?;
3429
3430        dataset = dataset
3431            .checkout_version(version as u64)
3432            .await
3433            .map_err(|e| {
3434                Error::namespace_source(
3435                    format!(
3436                        "Failed to checkout version {} for restore at '{}': {}",
3437                        version, table_uri, e
3438                    )
3439                    .into(),
3440                )
3441            })?;
3442
3443        dataset.restore().await.map_err(|e| {
3444            Error::namespace_source(
3445                format!(
3446                    "Failed to restore table at '{}' to version {}: {}",
3447                    table_uri, version, e
3448                )
3449                .into(),
3450            )
3451        })?;
3452
3453        let transaction_id = dataset
3454            .read_transaction()
3455            .await
3456            .map_err(|e| {
3457                Error::namespace_source(
3458                    format!(
3459                        "Failed to read transaction after restoring '{}': {}",
3460                        table_uri, e
3461                    )
3462                    .into(),
3463                )
3464            })?
3465            .map(|t| t.uuid);
3466
3467        Ok(RestoreTableResponse { transaction_id })
3468    }
3469
3470    async fn update_table_schema_metadata(
3471        &self,
3472        request: UpdateTableSchemaMetadataRequest,
3473    ) -> Result<UpdateTableSchemaMetadataResponse> {
3474        let table_uri = self.resolve_table_location(&request.id).await?;
3475        let mut dataset = self
3476            .load_dataset(&table_uri, None, "update_table_schema_metadata")
3477            .await?;
3478
3479        let new_metadata = request.metadata.unwrap_or_default();
3480        let updated_metadata = dataset
3481            .update_schema_metadata(new_metadata.iter().map(|(k, v)| (k.as_str(), v.as_str())))
3482            .await
3483            .map_err(|e| {
3484                Error::namespace_source(
3485                    format!(
3486                        "Failed to update schema metadata for table at '{}': {}",
3487                        table_uri, e
3488                    )
3489                    .into(),
3490                )
3491            })?;
3492
3493        let transaction_id = dataset
3494            .read_transaction()
3495            .await
3496            .map_err(|e| {
3497                Error::namespace_source(
3498                    format!(
3499                        "Failed to read transaction after updating metadata for '{}': {}",
3500                        table_uri, e
3501                    )
3502                    .into(),
3503                )
3504            })?
3505            .map(|t| t.uuid);
3506
3507        Ok(UpdateTableSchemaMetadataResponse {
3508            metadata: Some(updated_metadata),
3509            transaction_id,
3510        })
3511    }
3512
3513    async fn get_table_stats(
3514        &self,
3515        request: GetTableStatsRequest,
3516    ) -> Result<GetTableStatsResponse> {
3517        let table_uri = self.resolve_table_location(&request.id).await?;
3518        let dataset = Arc::new(
3519            self.load_dataset(&table_uri, None, "get_table_stats")
3520                .await?,
3521        );
3522
3523        // Compute total bytes on disk using field-level statistics
3524        let data_stats = dataset.calculate_data_stats().await.map_err(|e| {
3525            Error::namespace_source(
3526                format!(
3527                    "Failed to calculate data statistics for table at '{}': {}",
3528                    table_uri, e
3529                )
3530                .into(),
3531            )
3532        })?;
3533        let total_bytes: i64 = data_stats
3534            .fields
3535            .iter()
3536            .map(|f| f.bytes_on_disk as i64)
3537            .sum();
3538
3539        // Collect per-fragment row counts
3540        let fragment_row_futures: Vec<_> = dataset
3541            .get_fragments()
3542            .into_iter()
3543            .map(|f| async move { f.physical_rows().await })
3544            .collect();
3545        let fragment_row_results = futures::future::join_all(fragment_row_futures).await;
3546        let mut fragment_row_counts: Vec<i64> = fragment_row_results
3547            .into_iter()
3548            .filter_map(|r| r.ok())
3549            .map(|r| r as i64)
3550            .collect();
3551
3552        let num_fragments = fragment_row_counts.len() as i64;
3553        let num_rows: i64 = fragment_row_counts.iter().sum();
3554
3555        // Fragments with fewer rows than the compaction target are considered "small",
3556        // consistent with CompactionOptions::target_rows_per_fragment default.
3557        const SMALL_FRAGMENT_THRESHOLD: i64 = 1024 * 1024;
3558        let num_small_fragments = fragment_row_counts
3559            .iter()
3560            .filter(|&&r| r < SMALL_FRAGMENT_THRESHOLD)
3561            .count() as i64;
3562
3563        // Compute length summary statistics
3564        fragment_row_counts.sort_unstable();
3565        let lengths = if fragment_row_counts.is_empty() {
3566            FragmentSummary::new(0, 0, 0, 0, 0, 0, 0)
3567        } else {
3568            let len = fragment_row_counts.len();
3569            let min = fragment_row_counts[0];
3570            let max = fragment_row_counts[len - 1];
3571            let mean = num_rows / num_fragments;
3572            let pct = |p: f64| fragment_row_counts[((len - 1) as f64 * p) as usize];
3573            FragmentSummary::new(min, max, mean, pct(0.25), pct(0.50), pct(0.75), pct(0.99))
3574        };
3575
3576        // Count non-system indices
3577        let indices = dataset.load_indices().await.map_err(|e| {
3578            Error::namespace_source(
3579                format!("Failed to load indices for table at '{}': {}", table_uri, e).into(),
3580            )
3581        })?;
3582        let num_indices = indices.iter().filter(|m| !is_system_index(m)).count() as i64;
3583
3584        let fragment_stats = FragmentStats::new(num_fragments, num_small_fragments, lengths);
3585        Ok(GetTableStatsResponse::new(
3586            total_bytes,
3587            num_rows,
3588            num_indices,
3589            fragment_stats,
3590        ))
3591    }
3592
3593    async fn explain_table_query_plan(
3594        &self,
3595        request: ExplainTableQueryPlanRequest,
3596    ) -> Result<String> {
3597        let table_uri = self.resolve_table_location(&request.id).await?;
3598        let dataset = self
3599            .load_dataset(
3600                &table_uri,
3601                request.query.version,
3602                "explain_table_query_plan",
3603            )
3604            .await?;
3605        let verbose = request.verbose.unwrap_or(false);
3606
3607        let mut scanner = dataset.scan();
3608        Self::apply_query_params_to_scanner(
3609            &mut scanner,
3610            request.query.filter.as_deref(),
3611            request.query.columns.as_deref(),
3612            request.query.vector_column.as_deref(),
3613            &request.query.vector,
3614            request.query.k,
3615            request.query.offset,
3616            request.query.prefilter,
3617            request.query.bypass_vector_index,
3618            request.query.nprobes,
3619            request.query.ef,
3620            request.query.refine_factor,
3621            request.query.distance_type.as_deref(),
3622            request.query.fast_search,
3623            request.query.with_row_id,
3624            request.query.lower_bound,
3625            request.query.upper_bound,
3626            "explain_table_query_plan",
3627        )?;
3628
3629        scanner.explain_plan(verbose).await.map_err(|e| {
3630            Error::namespace_source(
3631                format!(
3632                    "Failed to explain query plan for table at '{}': {}",
3633                    table_uri, e
3634                )
3635                .into(),
3636            )
3637        })
3638    }
3639
3640    async fn analyze_table_query_plan(
3641        &self,
3642        request: AnalyzeTableQueryPlanRequest,
3643    ) -> Result<String> {
3644        let table_uri = self.resolve_table_location(&request.id).await?;
3645        let dataset = self
3646            .load_dataset(&table_uri, request.version, "analyze_table_query_plan")
3647            .await?;
3648
3649        let mut scanner = dataset.scan();
3650        Self::apply_query_params_to_scanner(
3651            &mut scanner,
3652            request.filter.as_deref(),
3653            request.columns.as_deref(),
3654            request.vector_column.as_deref(),
3655            &request.vector,
3656            request.k,
3657            request.offset,
3658            request.prefilter,
3659            request.bypass_vector_index,
3660            request.nprobes,
3661            request.ef,
3662            request.refine_factor,
3663            request.distance_type.as_deref(),
3664            request.fast_search,
3665            request.with_row_id,
3666            request.lower_bound,
3667            request.upper_bound,
3668            "analyze_table_query_plan",
3669        )?;
3670
3671        scanner.analyze_plan().await.map_err(|e| {
3672            Error::namespace_source(
3673                format!(
3674                    "Failed to analyze query plan for table at '{}': {}",
3675                    table_uri, e
3676                )
3677                .into(),
3678            )
3679        })
3680    }
3681
3682    async fn count_table_rows(&self, request: CountTableRowsRequest) -> Result<i64> {
3683        self.record_op("count_table_rows");
3684        let table_uri = self.resolve_table_location(&request.id).await?;
3685        let dataset = self
3686            .load_dataset(&table_uri, request.version, "count_table_rows")
3687            .await?;
3688
3689        let count =
3690            dataset
3691                .count_rows(request.predicate)
3692                .await
3693                .map_err(|e| NamespaceError::Internal {
3694                    message: format!("Failed to count rows for table at '{}': {:?}", table_uri, e),
3695                })?;
3696
3697        Ok(count as i64)
3698    }
3699
3700    async fn insert_into_table(
3701        &self,
3702        request: InsertIntoTableRequest,
3703        request_data: Bytes,
3704    ) -> Result<InsertIntoTableResponse> {
3705        self.record_op("insert_into_table");
3706        let table_uri = self.resolve_table_location(&request.id).await?;
3707        let (reader, _num_rows) =
3708            Self::ipc_reader_from_request_data(&request_data, "insert_into_table")?;
3709
3710        let mode = match request.mode.as_deref() {
3711            Some(m) if m.eq_ignore_ascii_case("overwrite") => WriteMode::Overwrite,
3712            Some(m) if m.eq_ignore_ascii_case("append") => WriteMode::Append,
3713            None => WriteMode::Append,
3714            Some(m) => {
3715                return Err(lance_namespace::error::NamespaceError::InvalidInput {
3716                    message: format!(
3717                        "Unsupported write mode '{}'. Supported modes are: 'append', 'overwrite'",
3718                        m
3719                    ),
3720                }
3721                .into());
3722            }
3723        };
3724
3725        if !self.table_uri_has_actual_manifests(&table_uri).await? {
3726            self.write_reader_to_table(&table_uri, reader, WriteMode::Create, None)
3727                .await?;
3728        } else {
3729            self.write_reader_to_table(&table_uri, reader, mode, None)
3730                .await?;
3731        }
3732
3733        Ok(InsertIntoTableResponse {
3734            transaction_id: None,
3735        })
3736    }
3737
3738    async fn merge_insert_into_table(
3739        &self,
3740        request: MergeInsertIntoTableRequest,
3741        request_data: Bytes,
3742    ) -> Result<MergeInsertIntoTableResponse> {
3743        self.record_op("merge_insert_into_table");
3744        let table_uri = self.resolve_table_location(&request.id).await?;
3745        let on = request.on.as_ref().ok_or_else(|| {
3746            lance_core::Error::from(NamespaceError::InvalidInput {
3747                message: "'on' field is required for merge_insert_into_table".to_string(),
3748            })
3749        })?;
3750
3751        let table_has_manifests = self.table_uri_has_actual_manifests(&table_uri).await?;
3752        let (reader, num_rows) =
3753            Self::ipc_reader_from_request_data(&request_data, "merge_insert_into_table")?;
3754
3755        if !table_has_manifests {
3756            let dataset = self
3757                .write_reader_to_table(&table_uri, reader, WriteMode::Create, None)
3758                .await?;
3759            let version = dataset.version().version as i64;
3760            return Ok(MergeInsertIntoTableResponse {
3761                transaction_id: None,
3762                num_updated_rows: Some(0),
3763                num_inserted_rows: Some(num_rows as i64),
3764                num_deleted_rows: Some(0),
3765                version: Some(version),
3766            });
3767        }
3768
3769        let dataset = Arc::new(
3770            self.load_dataset(&table_uri, None, "merge_insert_into_table")
3771                .await?,
3772        );
3773
3774        let mut merge_builder = MergeInsertBuilder::try_new(dataset.clone(), vec![on.clone()])
3775            .map_err(|e| {
3776                lance_core::Error::from(NamespaceError::InvalidInput {
3777                    message: format!("Failed to create merge_insert_into_table builder: {}", e),
3778                })
3779            })?;
3780
3781        if let Some(filter) = request.when_matched_update_all_filt.as_deref() {
3782            let behavior = WhenMatched::update_if(dataset.as_ref(), filter).map_err(|e| {
3783                lance_core::Error::from(NamespaceError::InvalidInput {
3784                    message: format!(
3785                        "Invalid when_matched_update_all_filt for merge_insert_into_table: {}",
3786                        e
3787                    ),
3788                })
3789            })?;
3790            merge_builder.when_matched(behavior);
3791        } else if request.when_matched_update_all.unwrap_or(false) {
3792            merge_builder.when_matched(WhenMatched::UpdateAll);
3793        }
3794
3795        if matches!(request.when_not_matched_insert_all, Some(false)) {
3796            merge_builder.when_not_matched(WhenNotMatched::DoNothing);
3797        } else {
3798            merge_builder.when_not_matched(WhenNotMatched::InsertAll);
3799        }
3800
3801        if let Some(filter) = request.when_not_matched_by_source_delete_filt.as_deref() {
3802            let behavior = WhenNotMatchedBySource::delete_if(dataset.as_ref(), filter).map_err(|e| {
3803                lance_core::Error::from(NamespaceError::InvalidInput {
3804                    message: format!(
3805                        "Invalid when_not_matched_by_source_delete_filt for merge_insert_into_table: {}",
3806                        e
3807                    ),
3808                })
3809            })?;
3810            merge_builder.when_not_matched_by_source(behavior);
3811        } else if request.when_not_matched_by_source_delete.unwrap_or(false) {
3812            merge_builder.when_not_matched_by_source(WhenNotMatchedBySource::Delete);
3813        }
3814
3815        if let Some(use_index) = request.use_index {
3816            merge_builder.use_index(use_index);
3817        }
3818
3819        let (dataset, stats) = merge_builder
3820            .try_build()
3821            .map_err(|e| {
3822                lance_core::Error::from(NamespaceError::InvalidInput {
3823                    message: format!("Failed to build merge_insert_into_table job: {}", e),
3824                })
3825            })?
3826            .execute_reader(reader)
3827            .await
3828            .map_err(|e| NamespaceError::Internal {
3829                message: format!(
3830                    "Failed to merge_insert_into_table at '{}': {}",
3831                    table_uri, e
3832                ),
3833            })?;
3834
3835        Ok(MergeInsertIntoTableResponse {
3836            transaction_id: None,
3837            num_updated_rows: Some(stats.num_updated_rows as i64),
3838            num_inserted_rows: Some(stats.num_inserted_rows as i64),
3839            num_deleted_rows: Some(stats.num_deleted_rows as i64),
3840            version: Some(dataset.version().version as i64),
3841        })
3842    }
3843
3844    async fn query_table(&self, request: QueryTableRequest) -> Result<Bytes> {
3845        use arrow::ipc::writer::FileWriter;
3846
3847        self.record_op("query_table");
3848        let table_uri = self.resolve_table_location(&request.id).await?;
3849        let dataset = self
3850            .load_dataset(&table_uri, request.version, "query_table")
3851            .await?;
3852
3853        // Build scanner
3854        let mut scanner = dataset.scan();
3855
3856        // Check if this is a vector search query
3857        // vector is Box<QueryTableRequestVector>, not Option
3858        let has_vector_query = request
3859            .vector
3860            .single_vector
3861            .as_ref()
3862            .map(|sv| !sv.is_empty())
3863            .unwrap_or(false)
3864            || request
3865                .vector
3866                .multi_vector
3867                .as_ref()
3868                .map(|mv| !mv.is_empty())
3869                .unwrap_or(false);
3870
3871        // Apply prefilter setting (must be set before nearest)
3872        if let Some(prefilter) = request.prefilter {
3873            scanner.prefilter(prefilter);
3874        }
3875
3876        // Apply vector search if query vector is provided
3877        if has_vector_query {
3878            let vector_column = request.vector_column.as_deref().unwrap_or("vector");
3879
3880            // Get the query vector(s)
3881            let query_vector: Vec<f32> = request
3882                .vector
3883                .single_vector
3884                .clone()
3885                .or_else(|| {
3886                    request
3887                        .vector
3888                        .multi_vector
3889                        .as_ref()
3890                        .and_then(|mv| mv.first().cloned())
3891                })
3892                .unwrap_or_default();
3893
3894            if !query_vector.is_empty() {
3895                let k = if request.k > 0 {
3896                    request.k as usize
3897                } else {
3898                    10
3899                };
3900                let query_array = Float32Array::from(query_vector);
3901                scanner
3902                    .nearest(vector_column, &query_array, k)
3903                    .map_err(|e| NamespaceError::InvalidInput {
3904                        message: format!("Invalid vector search: {:?}", e),
3905                    })?;
3906
3907                // Apply distance type if specified
3908                if let Some(ref distance_type) = request.distance_type {
3909                    let metric = match distance_type.to_lowercase().as_str() {
3910                        "l2" | "euclidean" => MetricType::L2,
3911                        "cosine" => MetricType::Cosine,
3912                        "dot" | "inner_product" => MetricType::Dot,
3913                        "hamming" => MetricType::Hamming,
3914                        _ => {
3915                            return Err(NamespaceError::InvalidInput {
3916                                message: format!("Unknown distance type: {}", distance_type),
3917                            }
3918                            .into());
3919                        }
3920                    };
3921                    scanner.distance_metric(metric);
3922                }
3923
3924                // Apply nprobes if specified (maps to minimum_nprobes, matching lancedb behavior)
3925                if let Some(nprobes) = request.nprobes {
3926                    scanner.minimum_nprobes(nprobes as usize);
3927                }
3928
3929                // Apply ef (HNSW search effort) if specified
3930                if let Some(ef) = request.ef {
3931                    scanner.ef(ef as usize);
3932                }
3933
3934                // Apply refine_factor if specified
3935                if let Some(refine_factor) = request.refine_factor {
3936                    scanner.refine(refine_factor as u32);
3937                }
3938
3939                // Apply distance bounds if specified
3940                if request.lower_bound.is_some() || request.upper_bound.is_some() {
3941                    scanner.distance_range(request.lower_bound, request.upper_bound);
3942                }
3943
3944                // Apply use_index (inverse of bypass_vector_index)
3945                if let Some(bypass) = request.bypass_vector_index {
3946                    scanner.use_index(!bypass);
3947                }
3948
3949                // Apply fast_search if specified
3950                if request.fast_search == Some(true) {
3951                    scanner.fast_search();
3952                }
3953            }
3954        }
3955
3956        // Apply full text search if specified
3957        if let Some(ref fts_query) = request.full_text_query {
3958            // Handle string_query (simple string FTS)
3959            if let Some(ref string_query) = fts_query.string_query {
3960                let mut fts = FullTextSearchQuery::new(string_query.query.clone());
3961
3962                // Apply column filter if specified
3963                if let Some(ref columns) = string_query.columns
3964                    && !columns.is_empty()
3965                {
3966                    fts = fts
3967                        .with_columns(columns)
3968                        .map_err(|e| NamespaceError::InvalidInput {
3969                            message: format!("Invalid FTS columns: {:?}", e),
3970                        })?;
3971                }
3972
3973                scanner
3974                    .full_text_search(fts)
3975                    .map_err(|e| NamespaceError::InvalidInput {
3976                        message: format!("Invalid full text search: {:?}", e),
3977                    })?;
3978            }
3979            // Note: structured_query would require more complex parsing
3980            // For now, we only support string_query
3981        }
3982
3983        // Apply column projection if specified
3984        if let Some(ref columns) = request.columns {
3985            if let Some(ref column_names) = columns.column_names
3986                && !column_names.is_empty()
3987            {
3988                scanner
3989                    .project(column_names)
3990                    .map_err(|e| NamespaceError::InvalidInput {
3991                        message: format!("Invalid column projection: {:?}", e),
3992                    })?;
3993            } else if let Some(ref column_aliases) = columns.column_aliases
3994                && !column_aliases.is_empty()
3995            {
3996                // column_aliases is HashMap<String, String> where key is alias, value is SQL expression
3997                let transform_pairs: Vec<(String, String)> = column_aliases
3998                    .iter()
3999                    .map(|(alias, sql)| (alias.clone(), sql.clone()))
4000                    .collect();
4001                scanner
4002                    .project_with_transform(
4003                        &transform_pairs
4004                            .iter()
4005                            .map(|(a, s)| (a.as_str(), s.as_str()))
4006                            .collect::<Vec<_>>(),
4007                    )
4008                    .map_err(|e| NamespaceError::InvalidInput {
4009                        message: format!("Invalid column alias expression: {:?}", e),
4010                    })?;
4011            }
4012        }
4013
4014        // Apply filter if specified
4015        if let Some(ref filter) = request.filter
4016            && !filter.is_empty()
4017        {
4018            scanner
4019                .filter(filter)
4020                .map_err(|e| NamespaceError::InvalidInput {
4021                    message: format!("Invalid filter expression: {:?}", e),
4022                })?;
4023        }
4024
4025        // Apply with_row_id if requested
4026        if request.with_row_id == Some(true) {
4027            scanner.with_row_id();
4028        }
4029
4030        // Apply limit if specified (k is the number of results to return)
4031        // k == 0 means no limit
4032        // Note: For vector search, limit is already applied via nearest()
4033        if !has_vector_query && request.k > 0 {
4034            let offset = request.offset.map(|o| o as i64);
4035            scanner.limit(Some(request.k as i64), offset).map_err(|e| {
4036                NamespaceError::InvalidInput {
4037                    message: format!("Invalid limit/offset: {:?}", e),
4038                }
4039            })?;
4040        } else if has_vector_query && request.offset.is_some() {
4041            // For vector search, offset is handled separately
4042            let offset = request.offset.map(|o| o as i64);
4043            scanner
4044                .limit(None, offset)
4045                .map_err(|e| NamespaceError::InvalidInput {
4046                    message: format!("Invalid offset: {:?}", e),
4047                })?;
4048        }
4049
4050        // Execute the scan and collect results
4051        let batch = scanner
4052            .try_into_batch()
4053            .await
4054            .map_err(|e| NamespaceError::Internal {
4055                message: format!("Failed to execute query: {:?}", e),
4056            })?;
4057
4058        // Serialize to Arrow IPC file format
4059        let schema = batch.schema();
4060        let mut buffer = Vec::new();
4061        {
4062            let mut writer = FileWriter::try_new(&mut buffer, &schema).map_err(|e| {
4063                NamespaceError::Internal {
4064                    message: format!("Failed to create IPC writer: {:?}", e),
4065                }
4066            })?;
4067            writer.write(&batch).map_err(|e| NamespaceError::Internal {
4068                message: format!("Failed to write batch to IPC: {:?}", e),
4069            })?;
4070            writer.finish().map_err(|e| NamespaceError::Internal {
4071                message: format!("Failed to finish IPC writer: {:?}", e),
4072            })?;
4073        }
4074
4075        Ok(Bytes::from(buffer))
4076    }
4077
4078    fn namespace_id(&self) -> String {
4079        format!("DirectoryNamespace {{ root: {:?} }}", self.root)
4080    }
4081}
4082
4083#[cfg(test)]
4084mod tests {
4085    use super::*;
4086    use arrow_ipc::reader::{FileReader, StreamReader};
4087    use lance::dataset::Dataset;
4088    use lance::index::DatasetIndexExt;
4089    use lance_core::utils::tempfile::{TempStdDir, TempStrDir};
4090    use lance_core::utils::testing::CountingObjectStore;
4091    use lance_io::object_store::{providers::local::FileStoreProvider, uri_to_url};
4092    use lance_namespace::models::{
4093        CreateTableRequest, JsonArrowDataType, JsonArrowField, JsonArrowSchema, ListTablesRequest,
4094        QueryTableRequestColumns,
4095    };
4096    use lance_namespace::schema::convert_json_arrow_schema;
4097    use std::io::Cursor;
4098    use std::sync::{
4099        Arc,
4100        atomic::{AtomicUsize, Ordering},
4101    };
4102    use url::Url;
4103
4104    fn assert_plan_contains_all(plan: &str, expected_fragments: &[&str], context: &str) {
4105        for expected_fragment in expected_fragments {
4106            assert!(
4107                plan.contains(expected_fragment),
4108                "{}. Missing fragment: '{}'. Plan:\n{}",
4109                context,
4110                expected_fragment,
4111                plan
4112            );
4113        }
4114    }
4115
4116    /// Helper to create a test DirectoryNamespace with a temporary directory
4117    async fn create_test_namespace() -> (DirectoryNamespace, TempStdDir) {
4118        let temp_dir = TempStdDir::default();
4119
4120        let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
4121            .build()
4122            .await
4123            .unwrap();
4124        (namespace, temp_dir)
4125    }
4126
4127    #[derive(Debug)]
4128    struct CountingFileStoreProvider {
4129        listing_count: Arc<AtomicUsize>,
4130    }
4131
4132    #[async_trait]
4133    impl lance_io::object_store::ObjectStoreProvider for CountingFileStoreProvider {
4134        async fn new_store(
4135            &self,
4136            base_path: Url,
4137            params: &ObjectStoreParams,
4138        ) -> Result<ObjectStore> {
4139            let provider = FileStoreProvider;
4140            let mut store = provider.new_store(base_path, params).await?;
4141            store.inner = Arc::new(CountingObjectStore::new(
4142                store.inner.clone(),
4143                self.listing_count.clone(),
4144            ));
4145            Ok(store)
4146        }
4147
4148        fn extract_path(&self, url: &Url) -> Result<Path> {
4149            let provider = FileStoreProvider;
4150            provider.extract_path(url)
4151        }
4152
4153        fn calculate_object_store_prefix(
4154            &self,
4155            url: &Url,
4156            storage_options: Option<&HashMap<String, String>>,
4157        ) -> Result<String> {
4158            let provider = FileStoreProvider;
4159            provider.calculate_object_store_prefix(url, storage_options)
4160        }
4161    }
4162
4163    fn file_object_store_uri(path: &str) -> String {
4164        let file_url = uri_to_url(path).unwrap();
4165        let mut url = Url::parse("file-object-store:///").unwrap();
4166        url.set_path(file_url.path());
4167        url.to_string()
4168    }
4169
4170    fn build_listing_counting_session(listing_count: Arc<AtomicUsize>) -> Arc<Session> {
4171        let registry = Arc::new(ObjectStoreRegistry::default());
4172        registry.insert(
4173            "file-object-store",
4174            Arc::new(CountingFileStoreProvider { listing_count }),
4175        );
4176        Arc::new(Session::new(0, 0, registry))
4177    }
4178
4179    /// Helper to create test IPC data from a schema
4180    fn create_test_ipc_data(schema: &JsonArrowSchema) -> Vec<u8> {
4181        use arrow::ipc::writer::StreamWriter;
4182
4183        let arrow_schema = convert_json_arrow_schema(schema).unwrap();
4184        let arrow_schema = Arc::new(arrow_schema);
4185        let batch = arrow::record_batch::RecordBatch::new_empty(arrow_schema.clone());
4186        let mut buffer = Vec::new();
4187        {
4188            let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
4189            writer.write(&batch).unwrap();
4190            writer.finish().unwrap();
4191        }
4192        buffer
4193    }
4194
4195    fn create_ipc_data_from_batches(
4196        schema: Arc<arrow_schema::Schema>,
4197        batches: Vec<arrow::record_batch::RecordBatch>,
4198    ) -> Vec<u8> {
4199        use arrow::ipc::writer::StreamWriter;
4200
4201        let mut buffer = Vec::new();
4202        {
4203            let mut writer = StreamWriter::try_new(&mut buffer, &schema).unwrap();
4204            for batch in &batches {
4205                writer.write(batch).unwrap();
4206            }
4207            writer.finish().unwrap();
4208        }
4209        buffer
4210    }
4211
4212    fn create_non_empty_test_ipc_data() -> Vec<u8> {
4213        use arrow::array::{Int32Array, StringArray};
4214        use arrow::record_batch::RecordBatch;
4215
4216        let schema = Arc::new(convert_json_arrow_schema(&create_test_schema()).unwrap());
4217        let batch = RecordBatch::try_new(
4218            schema.clone(),
4219            vec![
4220                Arc::new(Int32Array::from(vec![1, 2])),
4221                Arc::new(StringArray::from(vec![Some("alice"), Some("bob")])),
4222            ],
4223        )
4224        .unwrap();
4225        create_ipc_data_from_batches(schema, vec![batch])
4226    }
4227
4228    fn create_single_row_test_ipc_data() -> Vec<u8> {
4229        use arrow::array::{Int32Array, StringArray};
4230        use arrow::record_batch::RecordBatch;
4231
4232        let schema = Arc::new(convert_json_arrow_schema(&create_test_schema()).unwrap());
4233        let batch = RecordBatch::try_new(
4234            schema.clone(),
4235            vec![
4236                Arc::new(Int32Array::from(vec![10])),
4237                Arc::new(StringArray::from(vec![Some("carol")])),
4238            ],
4239        )
4240        .unwrap();
4241        create_ipc_data_from_batches(schema, vec![batch])
4242    }
4243
4244    /// Helper to create a simple test schema
4245    fn create_test_schema() -> JsonArrowSchema {
4246        let int_type = JsonArrowDataType::new("int32".to_string());
4247        let string_type = JsonArrowDataType::new("utf8".to_string());
4248
4249        let id_field = JsonArrowField {
4250            name: "id".to_string(),
4251            r#type: Box::new(int_type),
4252            nullable: false,
4253            metadata: None,
4254        };
4255
4256        let name_field = JsonArrowField {
4257            name: "name".to_string(),
4258            r#type: Box::new(string_type),
4259            nullable: true,
4260            metadata: None,
4261        };
4262
4263        JsonArrowSchema {
4264            fields: vec![id_field, name_field],
4265            metadata: None,
4266        }
4267    }
4268
4269    fn create_scalar_table_ipc_data() -> Vec<u8> {
4270        use arrow::array::{Int32Array, StringArray};
4271        use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
4272
4273        let schema = Arc::new(ArrowSchema::new(vec![
4274            Field::new("id", DataType::Int32, false),
4275            Field::new("name", DataType::Utf8, true),
4276        ]));
4277        let batch = arrow::record_batch::RecordBatch::try_new(
4278            schema.clone(),
4279            vec![
4280                Arc::new(Int32Array::from(vec![1, 2, 3])),
4281                Arc::new(StringArray::from(vec!["alice", "bob", "cory"])),
4282            ],
4283        )
4284        .unwrap();
4285        create_ipc_data_from_batches(schema, vec![batch])
4286    }
4287
4288    fn create_vector_table_ipc_data() -> Vec<u8> {
4289        use arrow::array::{FixedSizeListArray, Float32Array, Int32Array};
4290        use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
4291
4292        let schema = Arc::new(ArrowSchema::new(vec![
4293            Field::new("id", DataType::Int32, false),
4294            Field::new(
4295                "vector",
4296                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 2),
4297                true,
4298            ),
4299        ]));
4300        let vector_field = Arc::new(Field::new("item", DataType::Float32, true));
4301        let vectors = FixedSizeListArray::try_new(
4302            vector_field,
4303            2,
4304            Arc::new(Float32Array::from(vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6])),
4305            None,
4306        )
4307        .unwrap();
4308        let batch = arrow::record_batch::RecordBatch::try_new(
4309            schema.clone(),
4310            vec![Arc::new(Int32Array::from(vec![1, 2, 3])), Arc::new(vectors)],
4311        )
4312        .unwrap();
4313        create_ipc_data_from_batches(schema, vec![batch])
4314    }
4315
4316    async fn create_scalar_table(namespace: &DirectoryNamespace, table_name: &str) {
4317        let mut create_table_request = CreateTableRequest::new();
4318        create_table_request.id = Some(vec![table_name.to_string()]);
4319        namespace
4320            .create_table(
4321                create_table_request,
4322                Bytes::from(create_scalar_table_ipc_data()),
4323            )
4324            .await
4325            .unwrap();
4326    }
4327
4328    async fn create_vector_table(namespace: &DirectoryNamespace, table_name: &str) {
4329        let mut create_table_request = CreateTableRequest::new();
4330        create_table_request.id = Some(vec![table_name.to_string()]);
4331        namespace
4332            .create_table(
4333                create_table_request,
4334                Bytes::from(create_vector_table_ipc_data()),
4335            )
4336            .await
4337            .unwrap();
4338    }
4339
4340    async fn open_dataset(namespace: &DirectoryNamespace, table_name: &str) -> Dataset {
4341        let mut describe_request = DescribeTableRequest::new();
4342        describe_request.id = Some(vec![table_name.to_string()]);
4343        let table_uri = namespace
4344            .describe_table(describe_request)
4345            .await
4346            .unwrap()
4347            .location
4348            .expect("table location should exist");
4349        Dataset::open(&table_uri).await.unwrap()
4350    }
4351
4352    async fn create_scalar_index(
4353        namespace: &DirectoryNamespace,
4354        table_name: &str,
4355        index_name: &str,
4356    ) -> Option<String> {
4357        use lance_namespace::models::CreateTableIndexRequest;
4358
4359        let mut create_index_request =
4360            CreateTableIndexRequest::new("id".to_string(), "BTREE".to_string());
4361        create_index_request.id = Some(vec![table_name.to_string()]);
4362        create_index_request.name = Some(index_name.to_string());
4363        namespace
4364            .create_table_scalar_index(create_index_request)
4365            .await
4366            .unwrap()
4367            .transaction_id
4368    }
4369
4370    #[tokio::test]
4371    async fn test_create_table() {
4372        let (namespace, _temp_dir) = create_test_namespace().await;
4373
4374        // Create test IPC data
4375        let schema = create_test_schema();
4376        let ipc_data = create_test_ipc_data(&schema);
4377
4378        let mut request = CreateTableRequest::new();
4379        request.id = Some(vec!["test_table".to_string()]);
4380
4381        let response = namespace
4382            .create_table(request, bytes::Bytes::from(ipc_data))
4383            .await
4384            .unwrap();
4385
4386        assert!(response.location.is_some());
4387        assert!(response.location.unwrap().ends_with("test_table.lance"));
4388        assert_eq!(response.version, Some(1));
4389    }
4390
4391    #[tokio::test]
4392    async fn test_create_table_without_data() {
4393        let (namespace, _temp_dir) = create_test_namespace().await;
4394
4395        let mut request = CreateTableRequest::new();
4396        request.id = Some(vec!["test_table".to_string()]);
4397
4398        let result = namespace.create_table(request, bytes::Bytes::new()).await;
4399        assert!(result.is_err());
4400        assert!(
4401            result
4402                .unwrap_err()
4403                .to_string()
4404                .contains("Arrow IPC stream) is required")
4405        );
4406    }
4407
4408    #[tokio::test]
4409    async fn test_create_table_with_invalid_id() {
4410        let (namespace, _temp_dir) = create_test_namespace().await;
4411
4412        // Create test IPC data
4413        let schema = create_test_schema();
4414        let ipc_data = create_test_ipc_data(&schema);
4415
4416        // Test with empty ID
4417        let mut request = CreateTableRequest::new();
4418        request.id = Some(vec![]);
4419
4420        let result = namespace
4421            .create_table(request, bytes::Bytes::from(ipc_data.clone()))
4422            .await;
4423        assert!(result.is_err());
4424
4425        // Test with multi-level ID - should now work with manifest enabled
4426        // First create the parent namespace
4427        let mut create_ns_req = CreateNamespaceRequest::new();
4428        create_ns_req.id = Some(vec!["test_namespace".to_string()]);
4429        namespace.create_namespace(create_ns_req).await.unwrap();
4430
4431        // Now create table in the namespace
4432        let mut request = CreateTableRequest::new();
4433        request.id = Some(vec!["test_namespace".to_string(), "table".to_string()]);
4434
4435        let result = namespace
4436            .create_table(request, bytes::Bytes::from(ipc_data))
4437            .await;
4438        // Should succeed with manifest enabled
4439        assert!(
4440            result.is_ok(),
4441            "Multi-level table IDs should work with manifest enabled"
4442        );
4443    }
4444
4445    #[tokio::test]
4446    async fn test_list_tables() {
4447        let (namespace, _temp_dir) = create_test_namespace().await;
4448
4449        // Initially, no tables
4450        let mut request = ListTablesRequest::new();
4451        request.id = Some(vec![]);
4452        let response = namespace.list_tables(request).await.unwrap();
4453        assert_eq!(response.tables.len(), 0);
4454
4455        // Create test IPC data
4456        let schema = create_test_schema();
4457        let ipc_data = create_test_ipc_data(&schema);
4458
4459        // Create a table
4460        let mut create_request = CreateTableRequest::new();
4461        create_request.id = Some(vec!["table1".to_string()]);
4462        namespace
4463            .create_table(create_request, bytes::Bytes::from(ipc_data.clone()))
4464            .await
4465            .unwrap();
4466
4467        // Create another table
4468        let mut create_request = CreateTableRequest::new();
4469        create_request.id = Some(vec!["table2".to_string()]);
4470        namespace
4471            .create_table(create_request, bytes::Bytes::from(ipc_data))
4472            .await
4473            .unwrap();
4474
4475        // List tables should return both
4476        let mut request = ListTablesRequest::new();
4477        request.id = Some(vec![]);
4478        let response = namespace.list_tables(request).await.unwrap();
4479        let tables = response.tables;
4480        assert_eq!(tables.len(), 2);
4481        assert!(tables.contains(&"table1".to_string()));
4482        assert!(tables.contains(&"table2".to_string()));
4483    }
4484
4485    #[tokio::test]
4486    async fn test_list_tables_pagination() {
4487        let (namespace, _temp_dir) = create_test_namespace().await;
4488
4489        let schema = create_test_schema();
4490        let ipc_data = create_test_ipc_data(&schema);
4491
4492        for name in ["alpha", "bravo", "charlie"] {
4493            let mut req = CreateTableRequest::new();
4494            req.id = Some(vec![name.to_string()]);
4495            namespace
4496                .create_table(req, bytes::Bytes::from(ipc_data.clone()))
4497                .await
4498                .unwrap();
4499        }
4500
4501        // First page: limit=2, no page_token
4502        let first_page = namespace
4503            .list_tables(ListTablesRequest {
4504                id: Some(vec![]),
4505                limit: Some(2),
4506                ..Default::default()
4507            })
4508            .await
4509            .unwrap();
4510
4511        assert_eq!(first_page.tables, vec!["alpha", "bravo"]);
4512        assert_eq!(first_page.page_token.as_deref(), Some("bravo"));
4513
4514        // Second page: use page_token from first response
4515        let second_page = namespace
4516            .list_tables(ListTablesRequest {
4517                id: Some(vec![]),
4518                limit: Some(2),
4519                page_token: first_page.page_token.clone(),
4520                ..Default::default()
4521            })
4522            .await
4523            .unwrap();
4524
4525        assert_eq!(second_page.tables, vec!["charlie"]);
4526        assert!(second_page.page_token.is_none());
4527    }
4528
4529    #[tokio::test]
4530    async fn test_list_tables_pagination_limit_zero() {
4531        let (namespace, _temp_dir) = create_test_namespace().await;
4532
4533        let schema = create_test_schema();
4534        let ipc_data = create_test_ipc_data(&schema);
4535
4536        let mut req = CreateTableRequest::new();
4537        req.id = Some(vec!["alpha".to_string()]);
4538        namespace
4539            .create_table(req, bytes::Bytes::from(ipc_data))
4540            .await
4541            .unwrap();
4542
4543        let response = namespace
4544            .list_tables(ListTablesRequest {
4545                id: Some(vec![]),
4546                limit: Some(0),
4547                ..Default::default()
4548            })
4549            .await
4550            .unwrap();
4551
4552        assert!(response.tables.is_empty());
4553        assert!(response.page_token.is_none());
4554    }
4555
4556    #[tokio::test]
4557    async fn test_list_tables_with_namespace_id() {
4558        let (namespace, _temp_dir) = create_test_namespace().await;
4559
4560        // First create a child namespace
4561        let mut create_ns_req = CreateNamespaceRequest::new();
4562        create_ns_req.id = Some(vec!["test_namespace".to_string()]);
4563        namespace.create_namespace(create_ns_req).await.unwrap();
4564
4565        // Now list tables in the child namespace
4566        let mut request = ListTablesRequest::new();
4567        request.id = Some(vec!["test_namespace".to_string()]);
4568
4569        let result = namespace.list_tables(request).await;
4570        // Should succeed (with manifest enabled) and return empty list (no tables yet)
4571        assert!(
4572            result.is_ok(),
4573            "list_tables should work with child namespace when manifest is enabled"
4574        );
4575        let response = result.unwrap();
4576        assert_eq!(
4577            response.tables.len(),
4578            0,
4579            "Namespace should have no tables yet"
4580        );
4581    }
4582
4583    #[tokio::test]
4584    async fn test_create_scalar_index() {
4585        let (namespace, _temp_dir) = create_test_namespace().await;
4586        create_scalar_table(&namespace, "users").await;
4587
4588        let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4589        let dataset = open_dataset(&namespace, "users").await;
4590        let expected_transaction_id = dataset
4591            .read_transaction()
4592            .await
4593            .unwrap()
4594            .map(|transaction| transaction.uuid);
4595        assert_eq!(transaction_id, expected_transaction_id);
4596        let indices = dataset.load_indices().await.unwrap();
4597        assert!(indices.iter().any(|index| index.name == "users_id_idx"));
4598    }
4599
4600    #[tokio::test]
4601    async fn test_create_vector_index() {
4602        use lance_namespace::models::CreateTableIndexRequest;
4603
4604        let (namespace, _temp_dir) = create_test_namespace().await;
4605        create_vector_table(&namespace, "vectors").await;
4606
4607        let mut create_index_request =
4608            CreateTableIndexRequest::new("vector".to_string(), "IVF_FLAT".to_string());
4609        create_index_request.id = Some(vec!["vectors".to_string()]);
4610        create_index_request.name = Some("vector_idx".to_string());
4611        create_index_request.distance_type = Some("l2".to_string());
4612        let transaction_id = namespace
4613            .create_table_index(create_index_request)
4614            .await
4615            .unwrap()
4616            .transaction_id;
4617
4618        let dataset = open_dataset(&namespace, "vectors").await;
4619        let expected_transaction_id = dataset
4620            .read_transaction()
4621            .await
4622            .unwrap()
4623            .map(|transaction| transaction.uuid);
4624        assert_eq!(transaction_id, expected_transaction_id);
4625        let indices = dataset.load_indices().await.unwrap();
4626        assert!(indices.iter().any(|index| index.name == "vector_idx"));
4627    }
4628
4629    #[tokio::test]
4630    async fn test_list_table_indices() {
4631        use lance_namespace::models::ListTableIndicesRequest;
4632
4633        let (namespace, _temp_dir) = create_test_namespace().await;
4634        create_scalar_table(&namespace, "users").await;
4635        create_scalar_index(&namespace, "users", "a_idx").await;
4636        create_scalar_index(&namespace, "users", "b_idx").await;
4637        let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4638
4639        let response = namespace
4640            .list_table_indices(ListTableIndicesRequest {
4641                id: Some(vec!["users".to_string()]),
4642                ..Default::default()
4643            })
4644            .await
4645            .unwrap();
4646
4647        assert_eq!(response.indexes.len(), 3);
4648        assert_eq!(response.indexes[0].index_name, "a_idx");
4649        assert_eq!(response.indexes[1].index_name, "b_idx");
4650        assert_eq!(response.indexes[2].index_name, "users_id_idx");
4651        assert!(response.page_token.is_none());
4652        let users_id_idx = response
4653            .indexes
4654            .iter()
4655            .find(|index| index.index_name == "users_id_idx")
4656            .unwrap();
4657        assert_eq!(users_id_idx.columns, vec!["id"]);
4658        assert_eq!(users_id_idx.status, "SUCCEEDED");
4659
4660        let dataset = open_dataset(&namespace, "users").await;
4661        let expected_transaction_id = dataset
4662            .read_transaction()
4663            .await
4664            .unwrap()
4665            .map(|transaction| transaction.uuid);
4666        assert_eq!(transaction_id, expected_transaction_id);
4667        let indices = dataset.load_indices().await.unwrap();
4668        assert_eq!(
4669            indices
4670                .iter()
4671                .filter(|index| index.name == "users_id_idx")
4672                .count(),
4673            1
4674        );
4675
4676        let first_page = namespace
4677            .list_table_indices(ListTableIndicesRequest {
4678                id: Some(vec!["users".to_string()]),
4679                limit: Some(2),
4680                ..Default::default()
4681            })
4682            .await
4683            .unwrap();
4684
4685        assert_eq!(first_page.indexes.len(), 2);
4686        assert_eq!(first_page.indexes[0].index_name, "a_idx");
4687        assert_eq!(first_page.indexes[1].index_name, "b_idx");
4688        assert_eq!(first_page.page_token.as_deref(), Some("b_idx"));
4689
4690        let second_page = namespace
4691            .list_table_indices(ListTableIndicesRequest {
4692                id: Some(vec!["users".to_string()]),
4693                page_token: first_page.page_token.clone(),
4694                limit: Some(2),
4695                ..Default::default()
4696            })
4697            .await
4698            .unwrap();
4699
4700        assert_eq!(second_page.indexes.len(), 1);
4701        assert_eq!(second_page.indexes[0].index_name, "users_id_idx");
4702        assert!(second_page.page_token.is_none());
4703    }
4704
4705    #[tokio::test]
4706    async fn test_describe_table_index_stats() {
4707        use lance_namespace::models::DescribeTableIndexStatsRequest;
4708
4709        let (namespace, _temp_dir) = create_test_namespace().await;
4710        create_scalar_table(&namespace, "users").await;
4711        let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4712
4713        let response = namespace
4714            .describe_table_index_stats(DescribeTableIndexStatsRequest {
4715                id: Some(vec!["users".to_string()]),
4716                index_name: Some("users_id_idx".to_string()),
4717                ..Default::default()
4718            })
4719            .await
4720            .unwrap();
4721        assert_eq!(response.index_type, Some("BTree".to_string()));
4722        assert_eq!(response.num_indices, Some(1));
4723        assert_eq!(response.num_indexed_rows, Some(3));
4724        assert_eq!(response.num_unindexed_rows, Some(0));
4725
4726        let dataset = open_dataset(&namespace, "users").await;
4727        let expected_transaction_id = dataset
4728            .read_transaction()
4729            .await
4730            .unwrap()
4731            .map(|transaction| transaction.uuid);
4732        assert_eq!(transaction_id, expected_transaction_id);
4733        let stats: serde_json::Value =
4734            serde_json::from_str(&dataset.index_statistics("users_id_idx").await.unwrap()).unwrap();
4735        assert_eq!(stats["index_type"], "BTree");
4736        assert_eq!(stats["num_indices"], 1);
4737        assert_eq!(stats["num_indexed_rows"], 3);
4738        assert_eq!(stats["num_unindexed_rows"], 0);
4739    }
4740
4741    #[tokio::test]
4742    async fn test_describe_transaction() {
4743        use lance_namespace::models::DescribeTransactionRequest;
4744
4745        let (namespace, _temp_dir) = create_test_namespace().await;
4746        create_scalar_table(&namespace, "users").await;
4747        let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4748        let dataset = open_dataset(&namespace, "users").await;
4749        let latest_transaction = dataset.read_transaction().await.unwrap();
4750        assert_eq!(
4751            transaction_id,
4752            latest_transaction
4753                .as_ref()
4754                .map(|transaction| transaction.uuid.clone())
4755        );
4756
4757        if let Some(transaction_id) = transaction_id {
4758            let response = namespace
4759                .describe_transaction(DescribeTransactionRequest {
4760                    id: Some(vec!["users".to_string(), transaction_id.clone()]),
4761                    ..Default::default()
4762                })
4763                .await
4764                .unwrap();
4765            assert_eq!(response.status, "SUCCEEDED");
4766            assert_eq!(
4767                response
4768                    .properties
4769                    .as_ref()
4770                    .and_then(|props| props.get("operation")),
4771                Some(&"CreateIndex".to_string())
4772            );
4773            assert_eq!(
4774                response
4775                    .properties
4776                    .as_ref()
4777                    .and_then(|props| props.get("uuid")),
4778                Some(&transaction_id)
4779            );
4780        } else {
4781            assert!(latest_transaction.is_none());
4782        }
4783    }
4784
4785    #[tokio::test]
4786    async fn test_drop_table_index() {
4787        use lance_namespace::models::{DropTableIndexRequest, ListTableIndicesRequest};
4788
4789        let (namespace, _temp_dir) = create_test_namespace().await;
4790        create_scalar_table(&namespace, "users").await;
4791        let create_transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4792
4793        let drop_transaction_id = namespace
4794            .drop_table_index(DropTableIndexRequest {
4795                id: Some(vec!["users".to_string()]),
4796                index_name: Some("users_id_idx".to_string()),
4797                ..Default::default()
4798            })
4799            .await
4800            .unwrap()
4801            .transaction_id;
4802
4803        let dataset = open_dataset(&namespace, "users").await;
4804        let previous_dataset = dataset
4805            .checkout_version(dataset.version().version - 1)
4806            .await
4807            .unwrap();
4808        let previous_transaction_id = previous_dataset
4809            .read_transaction()
4810            .await
4811            .unwrap()
4812            .map(|transaction| transaction.uuid);
4813        assert_eq!(create_transaction_id, previous_transaction_id);
4814        let expected_drop_transaction_id = dataset
4815            .read_transaction()
4816            .await
4817            .unwrap()
4818            .map(|transaction| transaction.uuid);
4819        assert_eq!(drop_transaction_id, expected_drop_transaction_id);
4820        let indices = dataset.load_indices().await.unwrap();
4821        assert!(!indices.iter().any(|index| index.name == "users_id_idx"));
4822
4823        let list_response = namespace
4824            .list_table_indices(ListTableIndicesRequest {
4825                id: Some(vec!["users".to_string()]),
4826                ..Default::default()
4827            })
4828            .await
4829            .unwrap();
4830        assert!(list_response.indexes.is_empty());
4831    }
4832
4833    #[tokio::test]
4834    async fn test_describe_table() {
4835        let (namespace, _temp_dir) = create_test_namespace().await;
4836
4837        // Create a table first
4838        let schema = create_test_schema();
4839        let ipc_data = create_test_ipc_data(&schema);
4840
4841        let mut create_request = CreateTableRequest::new();
4842        create_request.id = Some(vec!["test_table".to_string()]);
4843        namespace
4844            .create_table(create_request, bytes::Bytes::from(ipc_data))
4845            .await
4846            .unwrap();
4847
4848        // Describe the table
4849        let mut request = DescribeTableRequest::new();
4850        request.id = Some(vec!["test_table".to_string()]);
4851        let response = namespace.describe_table(request).await.unwrap();
4852
4853        assert!(response.location.is_some());
4854        assert!(response.location.unwrap().ends_with("test_table.lance"));
4855    }
4856
4857    #[tokio::test]
4858    async fn test_describe_nonexistent_table() {
4859        let (namespace, _temp_dir) = create_test_namespace().await;
4860
4861        let mut request = DescribeTableRequest::new();
4862        request.id = Some(vec!["nonexistent".to_string()]);
4863
4864        let result = namespace.describe_table(request).await;
4865        assert!(result.is_err());
4866        assert!(result.unwrap_err().to_string().contains("Table not found"));
4867    }
4868
4869    #[tokio::test]
4870    async fn test_table_exists() {
4871        let (namespace, _temp_dir) = create_test_namespace().await;
4872
4873        // Create a table
4874        let schema = create_test_schema();
4875        let ipc_data = create_test_ipc_data(&schema);
4876
4877        let mut create_request = CreateTableRequest::new();
4878        create_request.id = Some(vec!["existing_table".to_string()]);
4879        namespace
4880            .create_table(create_request, bytes::Bytes::from(ipc_data))
4881            .await
4882            .unwrap();
4883
4884        // Check existing table
4885        let mut request = TableExistsRequest::new();
4886        request.id = Some(vec!["existing_table".to_string()]);
4887        let result = namespace.table_exists(request).await;
4888        assert!(result.is_ok());
4889
4890        // Check non-existent table
4891        let mut request = TableExistsRequest::new();
4892        request.id = Some(vec!["nonexistent".to_string()]);
4893        let result = namespace.table_exists(request).await;
4894        assert!(result.is_err());
4895        assert!(result.unwrap_err().to_string().contains("Table not found"));
4896    }
4897
4898    #[tokio::test]
4899    async fn test_drop_table() {
4900        let (namespace, _temp_dir) = create_test_namespace().await;
4901
4902        // Create a table
4903        let schema = create_test_schema();
4904        let ipc_data = create_test_ipc_data(&schema);
4905
4906        let mut create_request = CreateTableRequest::new();
4907        create_request.id = Some(vec!["table_to_drop".to_string()]);
4908        namespace
4909            .create_table(create_request, bytes::Bytes::from(ipc_data))
4910            .await
4911            .unwrap();
4912
4913        // Verify it exists
4914        let mut exists_request = TableExistsRequest::new();
4915        exists_request.id = Some(vec!["table_to_drop".to_string()]);
4916        assert!(namespace.table_exists(exists_request.clone()).await.is_ok());
4917
4918        // Drop the table
4919        let mut drop_request = DropTableRequest::new();
4920        drop_request.id = Some(vec!["table_to_drop".to_string()]);
4921        let response = namespace.drop_table(drop_request).await.unwrap();
4922        assert!(response.location.is_some());
4923
4924        // Verify it no longer exists
4925        assert!(namespace.table_exists(exists_request).await.is_err());
4926    }
4927
4928    #[tokio::test]
4929    async fn test_drop_nonexistent_table() {
4930        let (namespace, _temp_dir) = create_test_namespace().await;
4931
4932        let mut request = DropTableRequest::new();
4933        request.id = Some(vec!["nonexistent".to_string()]);
4934
4935        // Should not fail when dropping non-existent table (idempotent)
4936        let result = namespace.drop_table(request).await;
4937        // The operation might succeed or fail depending on implementation
4938        // But it should not panic
4939        let _ = result;
4940    }
4941
4942    #[tokio::test]
4943    async fn test_root_namespace_operations() {
4944        let (namespace, _temp_dir) = create_test_namespace().await;
4945
4946        // Test list_namespaces - should return empty list for root
4947        let mut request = ListNamespacesRequest::new();
4948        request.id = Some(vec![]);
4949        let result = namespace.list_namespaces(request).await;
4950        assert!(result.is_ok());
4951        assert_eq!(result.unwrap().namespaces.len(), 0);
4952
4953        // Test describe_namespace - should succeed for root
4954        let mut request = DescribeNamespaceRequest::new();
4955        request.id = Some(vec![]);
4956        let result = namespace.describe_namespace(request).await;
4957        assert!(result.is_ok());
4958
4959        // Test namespace_exists - root always exists
4960        let mut request = NamespaceExistsRequest::new();
4961        request.id = Some(vec![]);
4962        let result = namespace.namespace_exists(request).await;
4963        assert!(result.is_ok());
4964
4965        // Test create_namespace - root cannot be created
4966        let mut request = CreateNamespaceRequest::new();
4967        request.id = Some(vec![]);
4968        let result = namespace.create_namespace(request).await;
4969        assert!(result.is_err());
4970        assert!(result.unwrap_err().to_string().contains("already exists"));
4971
4972        // Test drop_namespace - root cannot be dropped
4973        let mut request = DropNamespaceRequest::new();
4974        request.id = Some(vec![]);
4975        let result = namespace.drop_namespace(request).await;
4976        assert!(result.is_err());
4977        assert!(
4978            result
4979                .unwrap_err()
4980                .to_string()
4981                .contains("cannot be dropped")
4982        );
4983    }
4984
4985    #[tokio::test]
4986    async fn test_non_root_namespace_operations() {
4987        let (namespace, _temp_dir) = create_test_namespace().await;
4988
4989        // With manifest enabled (default), child namespaces are now supported
4990        // Test create_namespace for non-root - should succeed with manifest
4991        let mut request = CreateNamespaceRequest::new();
4992        request.id = Some(vec!["child".to_string()]);
4993        let result = namespace.create_namespace(request).await;
4994        assert!(
4995            result.is_ok(),
4996            "Child namespace creation should succeed with manifest enabled"
4997        );
4998
4999        // Test namespace_exists for non-root - should exist after creation
5000        let mut request = NamespaceExistsRequest::new();
5001        request.id = Some(vec!["child".to_string()]);
5002        let result = namespace.namespace_exists(request).await;
5003        assert!(
5004            result.is_ok(),
5005            "Child namespace should exist after creation"
5006        );
5007
5008        // Test drop_namespace for non-root - should succeed
5009        let mut request = DropNamespaceRequest::new();
5010        request.id = Some(vec!["child".to_string()]);
5011        let result = namespace.drop_namespace(request).await;
5012        assert!(
5013            result.is_ok(),
5014            "Child namespace drop should succeed with manifest enabled"
5015        );
5016
5017        // Verify namespace no longer exists
5018        let mut request = NamespaceExistsRequest::new();
5019        request.id = Some(vec!["child".to_string()]);
5020        let result = namespace.namespace_exists(request).await;
5021        assert!(
5022            result.is_err(),
5023            "Child namespace should not exist after drop"
5024        );
5025    }
5026
5027    #[tokio::test]
5028    async fn test_config_custom_root() {
5029        let temp_dir = TempStdDir::default();
5030        let custom_path = temp_dir.join("custom");
5031        std::fs::create_dir(&custom_path).unwrap();
5032
5033        let namespace = DirectoryNamespaceBuilder::new(custom_path.to_string_lossy().to_string())
5034            .build()
5035            .await
5036            .unwrap();
5037
5038        // Create test IPC data
5039        let schema = create_test_schema();
5040        let ipc_data = create_test_ipc_data(&schema);
5041
5042        // Create a table and verify location
5043        let mut request = CreateTableRequest::new();
5044        request.id = Some(vec!["test_table".to_string()]);
5045
5046        let response = namespace
5047            .create_table(request, bytes::Bytes::from(ipc_data))
5048            .await
5049            .unwrap();
5050
5051        assert!(response.location.unwrap().contains("custom"));
5052    }
5053
5054    #[tokio::test]
5055    async fn test_config_storage_options() {
5056        let temp_dir = TempStdDir::default();
5057
5058        let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5059            .storage_option("option1", "value1")
5060            .storage_option("option2", "value2")
5061            .build()
5062            .await
5063            .unwrap();
5064
5065        // Create test IPC data
5066        let schema = create_test_schema();
5067        let ipc_data = create_test_ipc_data(&schema);
5068
5069        // Create a table and check storage options are included
5070        let mut request = CreateTableRequest::new();
5071        request.id = Some(vec!["test_table".to_string()]);
5072
5073        let response = namespace
5074            .create_table(request, bytes::Bytes::from(ipc_data))
5075            .await
5076            .unwrap();
5077
5078        let storage_options = response.storage_options.unwrap();
5079        assert_eq!(storage_options.get("option1"), Some(&"value1".to_string()));
5080        assert_eq!(storage_options.get("option2"), Some(&"value2".to_string()));
5081    }
5082
5083    /// When no credential vendor is configured, `describe_table` and
5084    /// `declare_table` must strip credential keys from storage options
5085    /// while preserving non-credential config (region, endpoint, etc.).
5086    #[tokio::test]
5087    async fn test_no_storage_options_without_vendor() {
5088        use lance_namespace::models::DeclareTableRequest;
5089
5090        let temp_dir = TempStdDir::default();
5091
5092        // No manifest, no credential vendor, but storage options with credentials
5093        let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5094            .manifest_enabled(false)
5095            .storage_option("aws_access_key_id", "AKID")
5096            .storage_option("aws_secret_access_key", "SECRET")
5097            .storage_option("region", "us-east-1")
5098            .build()
5099            .await
5100            .unwrap();
5101
5102        let schema = create_test_schema();
5103        let ipc_data = create_test_ipc_data(&schema);
5104
5105        // create_table
5106        let mut create_req = CreateTableRequest::new();
5107        create_req.id = Some(vec!["t1".to_string()]);
5108        namespace
5109            .create_table(create_req, bytes::Bytes::from(ipc_data))
5110            .await
5111            .unwrap();
5112
5113        // describe_table should not return storage options without a vendor
5114        let mut desc_req = DescribeTableRequest::new();
5115        desc_req.id = Some(vec!["t1".to_string()]);
5116        let resp = namespace.describe_table(desc_req).await.unwrap();
5117        assert!(resp.storage_options.is_none());
5118
5119        // declare_table should not return storage options without a vendor
5120        let mut decl_req = DeclareTableRequest::new();
5121        decl_req.id = Some(vec!["t2".to_string()]);
5122        let resp = namespace.declare_table(decl_req).await.unwrap();
5123        assert!(resp.storage_options.is_none());
5124    }
5125
5126    /// Same test with manifest mode enabled.
5127    #[tokio::test]
5128    async fn test_no_storage_options_without_vendor_manifest() {
5129        let temp_dir = TempStdDir::default();
5130
5131        let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5132            .storage_option("aws_access_key_id", "AKID")
5133            .storage_option("aws_secret_access_key", "SECRET")
5134            .storage_option("region", "us-east-1")
5135            .build()
5136            .await
5137            .unwrap();
5138
5139        let schema = create_test_schema();
5140        let ipc_data = create_test_ipc_data(&schema);
5141
5142        let mut create_req = CreateTableRequest::new();
5143        create_req.id = Some(vec!["t1".to_string()]);
5144        namespace
5145            .create_table(create_req, bytes::Bytes::from(ipc_data))
5146            .await
5147            .unwrap();
5148
5149        // describe_table through manifest should not return storage options without a vendor
5150        let mut desc_req = DescribeTableRequest::new();
5151        desc_req.id = Some(vec!["t1".to_string()]);
5152        let resp = namespace.describe_table(desc_req).await.unwrap();
5153        assert!(resp.storage_options.is_none());
5154    }
5155
5156    #[tokio::test]
5157    async fn test_from_properties_manifest_enabled() {
5158        let temp_dir = TempStdDir::default();
5159
5160        let mut properties = HashMap::new();
5161        properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5162        properties.insert("manifest_enabled".to_string(), "true".to_string());
5163        properties.insert("dir_listing_enabled".to_string(), "false".to_string());
5164
5165        let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5166        assert!(builder.manifest_enabled);
5167        assert!(!builder.dir_listing_enabled);
5168
5169        let namespace = builder.build().await.unwrap();
5170
5171        // Create test IPC data
5172        let schema = create_test_schema();
5173        let ipc_data = create_test_ipc_data(&schema);
5174
5175        // Create a table
5176        let mut request = CreateTableRequest::new();
5177        request.id = Some(vec!["test_table".to_string()]);
5178
5179        let response = namespace
5180            .create_table(request, bytes::Bytes::from(ipc_data))
5181            .await
5182            .unwrap();
5183
5184        assert!(response.location.is_some());
5185    }
5186
5187    #[tokio::test]
5188    async fn test_from_properties_dir_listing_enabled() {
5189        let temp_dir = TempStdDir::default();
5190
5191        let mut properties = HashMap::new();
5192        properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5193        properties.insert("manifest_enabled".to_string(), "false".to_string());
5194        properties.insert("dir_listing_enabled".to_string(), "true".to_string());
5195
5196        let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5197        assert!(!builder.manifest_enabled);
5198        assert!(builder.dir_listing_enabled);
5199
5200        let namespace = builder.build().await.unwrap();
5201
5202        // Create test IPC data
5203        let schema = create_test_schema();
5204        let ipc_data = create_test_ipc_data(&schema);
5205
5206        // Create a table
5207        let mut request = CreateTableRequest::new();
5208        request.id = Some(vec!["test_table".to_string()]);
5209
5210        let response = namespace
5211            .create_table(request, bytes::Bytes::from(ipc_data))
5212            .await
5213            .unwrap();
5214
5215        assert!(response.location.is_some());
5216    }
5217
5218    #[tokio::test]
5219    async fn test_from_properties_defaults() {
5220        let temp_dir = TempStdDir::default();
5221
5222        let mut properties = HashMap::new();
5223        properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5224
5225        let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5226        // Both should default to true
5227        assert!(builder.manifest_enabled);
5228        assert!(builder.dir_listing_enabled);
5229    }
5230
5231    #[tokio::test]
5232    async fn test_from_properties_with_storage_options() {
5233        let temp_dir = TempStdDir::default();
5234
5235        let mut properties = HashMap::new();
5236        properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5237        properties.insert("manifest_enabled".to_string(), "true".to_string());
5238        properties.insert("storage.region".to_string(), "us-west-2".to_string());
5239        properties.insert("storage.bucket".to_string(), "my-bucket".to_string());
5240
5241        let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5242        assert!(builder.manifest_enabled);
5243        assert!(builder.storage_options.is_some());
5244
5245        let storage_options = builder.storage_options.unwrap();
5246        assert_eq!(
5247            storage_options.get("region"),
5248            Some(&"us-west-2".to_string())
5249        );
5250        assert_eq!(
5251            storage_options.get("bucket"),
5252            Some(&"my-bucket".to_string())
5253        );
5254    }
5255
5256    #[tokio::test]
5257    async fn test_various_arrow_types() {
5258        let (namespace, _temp_dir) = create_test_namespace().await;
5259
5260        // Create schema with various types
5261        let fields = vec![
5262            JsonArrowField {
5263                name: "bool_col".to_string(),
5264                r#type: Box::new(JsonArrowDataType::new("bool".to_string())),
5265                nullable: true,
5266                metadata: None,
5267            },
5268            JsonArrowField {
5269                name: "int8_col".to_string(),
5270                r#type: Box::new(JsonArrowDataType::new("int8".to_string())),
5271                nullable: true,
5272                metadata: None,
5273            },
5274            JsonArrowField {
5275                name: "float64_col".to_string(),
5276                r#type: Box::new(JsonArrowDataType::new("float64".to_string())),
5277                nullable: true,
5278                metadata: None,
5279            },
5280            JsonArrowField {
5281                name: "binary_col".to_string(),
5282                r#type: Box::new(JsonArrowDataType::new("binary".to_string())),
5283                nullable: true,
5284                metadata: None,
5285            },
5286        ];
5287
5288        let schema = JsonArrowSchema {
5289            fields,
5290            metadata: None,
5291        };
5292
5293        // Create IPC data
5294        let ipc_data = create_test_ipc_data(&schema);
5295
5296        let mut request = CreateTableRequest::new();
5297        request.id = Some(vec!["complex_table".to_string()]);
5298
5299        let response = namespace
5300            .create_table(request, bytes::Bytes::from(ipc_data))
5301            .await
5302            .unwrap();
5303
5304        assert!(response.location.is_some());
5305    }
5306
5307    #[tokio::test]
5308    async fn test_connect_dir() {
5309        let temp_dir = TempStdDir::default();
5310
5311        let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5312            .build()
5313            .await
5314            .unwrap();
5315
5316        // Test basic operation through the concrete type
5317        let mut request = ListTablesRequest::new();
5318        request.id = Some(vec![]);
5319        let response = namespace.list_tables(request).await.unwrap();
5320        assert_eq!(response.tables.len(), 0);
5321    }
5322
5323    #[tokio::test]
5324    async fn test_create_table_with_ipc_data() {
5325        use arrow::array::{Int32Array, StringArray};
5326        use arrow::ipc::writer::StreamWriter;
5327
5328        let (namespace, _temp_dir) = create_test_namespace().await;
5329
5330        // Create a schema with some fields
5331        let schema = create_test_schema();
5332
5333        // Create some test data that matches the schema
5334        let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
5335        let arrow_schema = Arc::new(arrow_schema);
5336
5337        // Create a RecordBatch with actual data
5338        let id_array = Int32Array::from(vec![1, 2, 3]);
5339        let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
5340        let batch = arrow::record_batch::RecordBatch::try_new(
5341            arrow_schema.clone(),
5342            vec![Arc::new(id_array), Arc::new(name_array)],
5343        )
5344        .unwrap();
5345
5346        // Write the batch to an IPC stream
5347        let mut buffer = Vec::new();
5348        {
5349            let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
5350            writer.write(&batch).unwrap();
5351            writer.finish().unwrap();
5352        }
5353
5354        // Create table with the IPC data
5355        let mut request = CreateTableRequest::new();
5356        request.id = Some(vec!["test_table_with_data".to_string()]);
5357
5358        let response = namespace
5359            .create_table(request, Bytes::from(buffer))
5360            .await
5361            .unwrap();
5362
5363        assert_eq!(response.version, Some(1));
5364        assert!(
5365            response
5366                .location
5367                .unwrap()
5368                .contains("test_table_with_data.lance")
5369        );
5370
5371        // Verify table exists
5372        let mut exists_request = TableExistsRequest::new();
5373        exists_request.id = Some(vec!["test_table_with_data".to_string()]);
5374        namespace.table_exists(exists_request).await.unwrap();
5375    }
5376
5377    #[tokio::test]
5378    async fn test_child_namespace_create_and_list() {
5379        let (namespace, _temp_dir) = create_test_namespace().await;
5380
5381        // Create multiple child namespaces
5382        for i in 1..=3 {
5383            let mut create_req = CreateNamespaceRequest::new();
5384            create_req.id = Some(vec![format!("ns{}", i)]);
5385            let result = namespace.create_namespace(create_req).await;
5386            assert!(result.is_ok(), "Failed to create child namespace ns{}", i);
5387        }
5388
5389        // List child namespaces
5390        let list_req = ListNamespacesRequest {
5391            id: Some(vec![]),
5392            ..Default::default()
5393        };
5394        let result = namespace.list_namespaces(list_req).await;
5395        assert!(result.is_ok());
5396        let namespaces = result.unwrap().namespaces;
5397        assert_eq!(namespaces.len(), 3);
5398        assert!(namespaces.contains(&"ns1".to_string()));
5399        assert!(namespaces.contains(&"ns2".to_string()));
5400        assert!(namespaces.contains(&"ns3".to_string()));
5401    }
5402
5403    #[tokio::test]
5404    async fn test_nested_namespace_hierarchy() {
5405        let (namespace, _temp_dir) = create_test_namespace().await;
5406
5407        // Create parent namespace
5408        let mut create_req = CreateNamespaceRequest::new();
5409        create_req.id = Some(vec!["parent".to_string()]);
5410        namespace.create_namespace(create_req).await.unwrap();
5411
5412        // Create nested children
5413        let mut create_req = CreateNamespaceRequest::new();
5414        create_req.id = Some(vec!["parent".to_string(), "child1".to_string()]);
5415        namespace.create_namespace(create_req).await.unwrap();
5416
5417        let mut create_req = CreateNamespaceRequest::new();
5418        create_req.id = Some(vec!["parent".to_string(), "child2".to_string()]);
5419        namespace.create_namespace(create_req).await.unwrap();
5420
5421        // List children of parent
5422        let list_req = ListNamespacesRequest {
5423            id: Some(vec!["parent".to_string()]),
5424            ..Default::default()
5425        };
5426        let result = namespace.list_namespaces(list_req).await;
5427        assert!(result.is_ok());
5428        let children = result.unwrap().namespaces;
5429        assert_eq!(children.len(), 2);
5430        assert!(children.contains(&"child1".to_string()));
5431        assert!(children.contains(&"child2".to_string()));
5432
5433        // List root should only show parent
5434        let list_req = ListNamespacesRequest {
5435            id: Some(vec![]),
5436            ..Default::default()
5437        };
5438        let result = namespace.list_namespaces(list_req).await;
5439        assert!(result.is_ok());
5440        let root_namespaces = result.unwrap().namespaces;
5441        assert_eq!(root_namespaces.len(), 1);
5442        assert_eq!(root_namespaces[0], "parent");
5443    }
5444
5445    #[tokio::test]
5446    async fn test_table_in_child_namespace() {
5447        let (namespace, _temp_dir) = create_test_namespace().await;
5448
5449        // Create child namespace
5450        let mut create_ns_req = CreateNamespaceRequest::new();
5451        create_ns_req.id = Some(vec!["test_ns".to_string()]);
5452        namespace.create_namespace(create_ns_req).await.unwrap();
5453
5454        // Create table in child namespace
5455        let schema = create_test_schema();
5456        let ipc_data = create_test_ipc_data(&schema);
5457        let mut create_table_req = CreateTableRequest::new();
5458        create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5459        let result = namespace
5460            .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5461            .await;
5462        assert!(result.is_ok(), "Failed to create table in child namespace");
5463
5464        // List tables in child namespace
5465        let list_req = ListTablesRequest {
5466            id: Some(vec!["test_ns".to_string()]),
5467            ..Default::default()
5468        };
5469        let result = namespace.list_tables(list_req).await;
5470        assert!(result.is_ok());
5471        let tables = result.unwrap().tables;
5472        assert_eq!(tables.len(), 1);
5473        assert_eq!(tables[0], "table1");
5474
5475        // Verify table exists
5476        let mut exists_req = TableExistsRequest::new();
5477        exists_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5478        let result = namespace.table_exists(exists_req).await;
5479        assert!(result.is_ok());
5480
5481        // Describe table in child namespace
5482        let mut describe_req = DescribeTableRequest::new();
5483        describe_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5484        let result = namespace.describe_table(describe_req).await;
5485        assert!(result.is_ok());
5486        let response = result.unwrap();
5487        assert!(response.location.is_some());
5488    }
5489
5490    #[tokio::test]
5491    async fn test_multiple_tables_in_child_namespace() {
5492        let (namespace, _temp_dir) = create_test_namespace().await;
5493
5494        // Create child namespace
5495        let mut create_ns_req = CreateNamespaceRequest::new();
5496        create_ns_req.id = Some(vec!["test_ns".to_string()]);
5497        namespace.create_namespace(create_ns_req).await.unwrap();
5498
5499        // Create multiple tables
5500        let schema = create_test_schema();
5501        let ipc_data = create_test_ipc_data(&schema);
5502        for i in 1..=3 {
5503            let mut create_table_req = CreateTableRequest::new();
5504            create_table_req.id = Some(vec!["test_ns".to_string(), format!("table{}", i)]);
5505            namespace
5506                .create_table(create_table_req, bytes::Bytes::from(ipc_data.clone()))
5507                .await
5508                .unwrap();
5509        }
5510
5511        // List tables
5512        let list_req = ListTablesRequest {
5513            id: Some(vec!["test_ns".to_string()]),
5514            ..Default::default()
5515        };
5516        let result = namespace.list_tables(list_req).await;
5517        assert!(result.is_ok());
5518        let tables = result.unwrap().tables;
5519        assert_eq!(tables.len(), 3);
5520        assert!(tables.contains(&"table1".to_string()));
5521        assert!(tables.contains(&"table2".to_string()));
5522        assert!(tables.contains(&"table3".to_string()));
5523    }
5524
5525    #[tokio::test]
5526    async fn test_drop_table_in_child_namespace() {
5527        let (namespace, _temp_dir) = create_test_namespace().await;
5528
5529        // Create child namespace
5530        let mut create_ns_req = CreateNamespaceRequest::new();
5531        create_ns_req.id = Some(vec!["test_ns".to_string()]);
5532        namespace.create_namespace(create_ns_req).await.unwrap();
5533
5534        // Create table
5535        let schema = create_test_schema();
5536        let ipc_data = create_test_ipc_data(&schema);
5537        let mut create_table_req = CreateTableRequest::new();
5538        create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5539        namespace
5540            .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5541            .await
5542            .unwrap();
5543
5544        // Drop table
5545        let mut drop_req = DropTableRequest::new();
5546        drop_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5547        let result = namespace.drop_table(drop_req).await;
5548        assert!(result.is_ok(), "Failed to drop table in child namespace");
5549
5550        // Verify table no longer exists
5551        let mut exists_req = TableExistsRequest::new();
5552        exists_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5553        let result = namespace.table_exists(exists_req).await;
5554        assert!(result.is_err());
5555    }
5556
5557    #[tokio::test]
5558    async fn test_deeply_nested_namespace() {
5559        let (namespace, _temp_dir) = create_test_namespace().await;
5560
5561        // Create deeply nested namespace hierarchy
5562        let mut create_req = CreateNamespaceRequest::new();
5563        create_req.id = Some(vec!["level1".to_string()]);
5564        namespace.create_namespace(create_req).await.unwrap();
5565
5566        let mut create_req = CreateNamespaceRequest::new();
5567        create_req.id = Some(vec!["level1".to_string(), "level2".to_string()]);
5568        namespace.create_namespace(create_req).await.unwrap();
5569
5570        let mut create_req = CreateNamespaceRequest::new();
5571        create_req.id = Some(vec![
5572            "level1".to_string(),
5573            "level2".to_string(),
5574            "level3".to_string(),
5575        ]);
5576        namespace.create_namespace(create_req).await.unwrap();
5577
5578        // Create table in deeply nested namespace
5579        let schema = create_test_schema();
5580        let ipc_data = create_test_ipc_data(&schema);
5581        let mut create_table_req = CreateTableRequest::new();
5582        create_table_req.id = Some(vec![
5583            "level1".to_string(),
5584            "level2".to_string(),
5585            "level3".to_string(),
5586            "table1".to_string(),
5587        ]);
5588        let result = namespace
5589            .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5590            .await;
5591        assert!(
5592            result.is_ok(),
5593            "Failed to create table in deeply nested namespace"
5594        );
5595
5596        // Verify table exists
5597        let mut exists_req = TableExistsRequest::new();
5598        exists_req.id = Some(vec![
5599            "level1".to_string(),
5600            "level2".to_string(),
5601            "level3".to_string(),
5602            "table1".to_string(),
5603        ]);
5604        let result = namespace.table_exists(exists_req).await;
5605        assert!(result.is_ok());
5606    }
5607
5608    #[tokio::test]
5609    async fn test_namespace_with_properties() {
5610        let (namespace, _temp_dir) = create_test_namespace().await;
5611
5612        // Create namespace with properties
5613        let mut properties = HashMap::new();
5614        properties.insert("owner".to_string(), "test_user".to_string());
5615        properties.insert("description".to_string(), "Test namespace".to_string());
5616
5617        let mut create_req = CreateNamespaceRequest::new();
5618        create_req.id = Some(vec!["test_ns".to_string()]);
5619        create_req.properties = Some(properties.clone());
5620        namespace.create_namespace(create_req).await.unwrap();
5621
5622        // Describe namespace and verify properties
5623        let describe_req = DescribeNamespaceRequest {
5624            id: Some(vec!["test_ns".to_string()]),
5625            ..Default::default()
5626        };
5627        let result = namespace.describe_namespace(describe_req).await;
5628        assert!(result.is_ok());
5629        let response = result.unwrap();
5630        assert!(response.properties.is_some());
5631        let props = response.properties.unwrap();
5632        assert_eq!(props.get("owner"), Some(&"test_user".to_string()));
5633        assert_eq!(
5634            props.get("description"),
5635            Some(&"Test namespace".to_string())
5636        );
5637    }
5638
5639    #[tokio::test]
5640    async fn test_cannot_drop_namespace_with_tables() {
5641        let (namespace, _temp_dir) = create_test_namespace().await;
5642
5643        // Create namespace
5644        let mut create_ns_req = CreateNamespaceRequest::new();
5645        create_ns_req.id = Some(vec!["test_ns".to_string()]);
5646        namespace.create_namespace(create_ns_req).await.unwrap();
5647
5648        // Create table in namespace
5649        let schema = create_test_schema();
5650        let ipc_data = create_test_ipc_data(&schema);
5651        let mut create_table_req = CreateTableRequest::new();
5652        create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5653        namespace
5654            .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5655            .await
5656            .unwrap();
5657
5658        // Try to drop namespace - should fail
5659        let mut drop_req = DropNamespaceRequest::new();
5660        drop_req.id = Some(vec!["test_ns".to_string()]);
5661        let result = namespace.drop_namespace(drop_req).await;
5662        assert!(
5663            result.is_err(),
5664            "Should not be able to drop namespace with tables"
5665        );
5666    }
5667
5668    #[tokio::test]
5669    async fn test_isolation_between_namespaces() {
5670        let (namespace, _temp_dir) = create_test_namespace().await;
5671
5672        // Create two namespaces
5673        let mut create_req = CreateNamespaceRequest::new();
5674        create_req.id = Some(vec!["ns1".to_string()]);
5675        namespace.create_namespace(create_req).await.unwrap();
5676
5677        let mut create_req = CreateNamespaceRequest::new();
5678        create_req.id = Some(vec!["ns2".to_string()]);
5679        namespace.create_namespace(create_req).await.unwrap();
5680
5681        // Create table with same name in both namespaces
5682        let schema = create_test_schema();
5683        let ipc_data = create_test_ipc_data(&schema);
5684
5685        let mut create_table_req = CreateTableRequest::new();
5686        create_table_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5687        namespace
5688            .create_table(create_table_req, bytes::Bytes::from(ipc_data.clone()))
5689            .await
5690            .unwrap();
5691
5692        let mut create_table_req = CreateTableRequest::new();
5693        create_table_req.id = Some(vec!["ns2".to_string(), "table1".to_string()]);
5694        namespace
5695            .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5696            .await
5697            .unwrap();
5698
5699        // List tables in each namespace
5700        let list_req = ListTablesRequest {
5701            id: Some(vec!["ns1".to_string()]),
5702            page_token: None,
5703            limit: None,
5704            ..Default::default()
5705        };
5706        let result = namespace.list_tables(list_req).await.unwrap();
5707        assert_eq!(result.tables.len(), 1);
5708        assert_eq!(result.tables[0], "table1");
5709
5710        let list_req = ListTablesRequest {
5711            id: Some(vec!["ns2".to_string()]),
5712            page_token: None,
5713            limit: None,
5714            ..Default::default()
5715        };
5716        let result = namespace.list_tables(list_req).await.unwrap();
5717        assert_eq!(result.tables.len(), 1);
5718        assert_eq!(result.tables[0], "table1");
5719
5720        // Drop table in ns1 shouldn't affect ns2
5721        let mut drop_req = DropTableRequest::new();
5722        drop_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5723        namespace.drop_table(drop_req).await.unwrap();
5724
5725        // Verify ns1 table is gone but ns2 table still exists
5726        let mut exists_req = TableExistsRequest::new();
5727        exists_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5728        assert!(namespace.table_exists(exists_req).await.is_err());
5729
5730        let mut exists_req = TableExistsRequest::new();
5731        exists_req.id = Some(vec!["ns2".to_string(), "table1".to_string()]);
5732        assert!(namespace.table_exists(exists_req).await.is_ok());
5733    }
5734
5735    #[tokio::test]
5736    async fn test_migrate_directory_tables() {
5737        let temp_dir = TempStdDir::default();
5738        let temp_path = temp_dir.to_str().unwrap();
5739
5740        // Step 1: Create tables in directory-only mode
5741        let dir_only_ns = DirectoryNamespaceBuilder::new(temp_path)
5742            .manifest_enabled(false)
5743            .dir_listing_enabled(true)
5744            .build()
5745            .await
5746            .unwrap();
5747
5748        // Create some tables
5749        let schema = create_test_schema();
5750        let ipc_data = create_test_ipc_data(&schema);
5751
5752        for i in 1..=3 {
5753            let mut create_req = CreateTableRequest::new();
5754            create_req.id = Some(vec![format!("table{}", i)]);
5755            dir_only_ns
5756                .create_table(create_req, bytes::Bytes::from(ipc_data.clone()))
5757                .await
5758                .unwrap();
5759        }
5760
5761        drop(dir_only_ns);
5762
5763        // Step 2: Create namespace with dual mode (manifest + directory listing)
5764        let dual_mode_ns = DirectoryNamespaceBuilder::new(temp_path)
5765            .manifest_enabled(true)
5766            .dir_listing_enabled(true)
5767            .build()
5768            .await
5769            .unwrap();
5770
5771        // Before migration, tables should be visible (via directory listing fallback)
5772        let mut list_req = ListTablesRequest::new();
5773        list_req.id = Some(vec![]);
5774        let tables = dual_mode_ns.list_tables(list_req).await.unwrap().tables;
5775        assert_eq!(tables.len(), 3);
5776
5777        // Run migration
5778        let migrated_count = dual_mode_ns.migrate().await.unwrap();
5779        assert_eq!(migrated_count, 3, "Should migrate all 3 tables");
5780
5781        // Verify tables are now in manifest
5782        let mut list_req = ListTablesRequest::new();
5783        list_req.id = Some(vec![]);
5784        let tables = dual_mode_ns.list_tables(list_req).await.unwrap().tables;
5785        assert_eq!(tables.len(), 3);
5786
5787        // Run migration again - should be idempotent
5788        let migrated_count = dual_mode_ns.migrate().await.unwrap();
5789        assert_eq!(
5790            migrated_count, 0,
5791            "Should not migrate already-migrated tables"
5792        );
5793
5794        drop(dual_mode_ns);
5795
5796        // Step 3: Create namespace with manifest-only mode
5797        let manifest_only_ns = DirectoryNamespaceBuilder::new(temp_path)
5798            .manifest_enabled(true)
5799            .dir_listing_enabled(false)
5800            .build()
5801            .await
5802            .unwrap();
5803
5804        // Tables should still be accessible (now from manifest only)
5805        let mut list_req = ListTablesRequest::new();
5806        list_req.id = Some(vec![]);
5807        let tables = manifest_only_ns.list_tables(list_req).await.unwrap().tables;
5808        assert_eq!(tables.len(), 3);
5809        assert!(tables.contains(&"table1".to_string()));
5810        assert!(tables.contains(&"table2".to_string()));
5811        assert!(tables.contains(&"table3".to_string()));
5812    }
5813
5814    #[tokio::test]
5815    async fn test_migrate_without_manifest() {
5816        let temp_dir = TempStdDir::default();
5817        let temp_path = temp_dir.to_str().unwrap();
5818
5819        // Create namespace without manifest
5820        let namespace = DirectoryNamespaceBuilder::new(temp_path)
5821            .manifest_enabled(false)
5822            .dir_listing_enabled(true)
5823            .build()
5824            .await
5825            .unwrap();
5826
5827        // migrate() should return 0 when manifest is not enabled
5828        let migrated_count = namespace.migrate().await.unwrap();
5829        assert_eq!(migrated_count, 0);
5830    }
5831
5832    #[tokio::test]
5833    async fn test_register_table() {
5834        use lance_namespace::models::{RegisterTableRequest, TableExistsRequest};
5835
5836        let temp_dir = TempStdDir::default();
5837        let temp_path = temp_dir.to_str().unwrap();
5838
5839        let namespace = DirectoryNamespaceBuilder::new(temp_path)
5840            .dir_listing_to_manifest_migration_enabled(true)
5841            .build()
5842            .await
5843            .unwrap();
5844
5845        // Create a physical table first using lance directly
5846        let schema = create_test_schema();
5847        let ipc_data = create_test_ipc_data(&schema);
5848
5849        let table_uri = format!("{}/external_table.lance", temp_path);
5850        let cursor = Cursor::new(ipc_data);
5851        let stream_reader = StreamReader::try_new(cursor, None).unwrap();
5852        let batches: Vec<_> = stream_reader
5853            .collect::<std::result::Result<Vec<_>, _>>()
5854            .unwrap();
5855        let schema = batches[0].schema();
5856        let batch_results: Vec<_> = batches.into_iter().map(Ok).collect();
5857        let reader = RecordBatchIterator::new(batch_results, schema);
5858        Dataset::write(Box::new(reader), &table_uri, None)
5859            .await
5860            .unwrap();
5861
5862        // Register the table
5863        let mut register_req = RegisterTableRequest::new("external_table.lance".to_string());
5864        register_req.id = Some(vec!["registered_table".to_string()]);
5865
5866        let response = namespace.register_table(register_req).await.unwrap();
5867        assert_eq!(response.location, Some("external_table.lance".to_string()));
5868
5869        // Verify table exists in namespace
5870        let mut exists_req = TableExistsRequest::new();
5871        exists_req.id = Some(vec!["registered_table".to_string()]);
5872        assert!(namespace.table_exists(exists_req).await.is_ok());
5873
5874        // Verify we can list the table
5875        let mut list_req = ListTablesRequest::new();
5876        list_req.id = Some(vec![]);
5877        let tables = namespace.list_tables(list_req).await.unwrap();
5878        assert!(tables.tables.contains(&"registered_table".to_string()));
5879    }
5880
5881    #[tokio::test]
5882    async fn test_register_table_duplicate_fails() {
5883        use lance_namespace::models::RegisterTableRequest;
5884
5885        let temp_dir = TempStdDir::default();
5886        let temp_path = temp_dir.to_str().unwrap();
5887
5888        let namespace = DirectoryNamespaceBuilder::new(temp_path)
5889            .build()
5890            .await
5891            .unwrap();
5892
5893        // Register a table
5894        let mut register_req = RegisterTableRequest::new("test_table.lance".to_string());
5895        register_req.id = Some(vec!["test_table".to_string()]);
5896
5897        namespace
5898            .register_table(register_req.clone())
5899            .await
5900            .unwrap();
5901
5902        // Try to register again - should fail
5903        let result = namespace.register_table(register_req).await;
5904        assert!(result.is_err());
5905        assert!(result.unwrap_err().to_string().contains("already exists"));
5906    }
5907
5908    #[tokio::test]
5909    async fn test_deregister_table() {
5910        use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
5911
5912        let temp_dir = TempStdDir::default();
5913        let temp_path = temp_dir.to_str().unwrap();
5914
5915        // Create namespace with manifest-only mode (no directory listing fallback)
5916        // This ensures deregistered tables are truly invisible
5917        let namespace = DirectoryNamespaceBuilder::new(temp_path)
5918            .manifest_enabled(true)
5919            .dir_listing_enabled(false)
5920            .build()
5921            .await
5922            .unwrap();
5923
5924        // Create a table
5925        let schema = create_test_schema();
5926        let ipc_data = create_test_ipc_data(&schema);
5927
5928        let mut create_req = CreateTableRequest::new();
5929        create_req.id = Some(vec!["test_table".to_string()]);
5930        namespace
5931            .create_table(create_req, bytes::Bytes::from(ipc_data))
5932            .await
5933            .unwrap();
5934
5935        // Verify table exists
5936        let mut exists_req = TableExistsRequest::new();
5937        exists_req.id = Some(vec!["test_table".to_string()]);
5938        assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
5939
5940        // Deregister the table
5941        let mut deregister_req = DeregisterTableRequest::new();
5942        deregister_req.id = Some(vec!["test_table".to_string()]);
5943        let response = namespace.deregister_table(deregister_req).await.unwrap();
5944
5945        // Should return location and id
5946        assert!(
5947            response.location.is_some(),
5948            "Deregister should return location"
5949        );
5950        let location = response.location.as_ref().unwrap();
5951        // Location should be a proper file:// URI with the temp path
5952        // Use uri_to_url to normalize the temp path to a URL for comparison
5953        let expected_url = lance_io::object_store::uri_to_url(temp_path)
5954            .expect("Failed to convert temp path to URL");
5955        let expected_prefix = expected_url.to_string();
5956        assert!(
5957            location.starts_with(&expected_prefix),
5958            "Location should start with '{}', got: {}",
5959            expected_prefix,
5960            location
5961        );
5962        assert!(
5963            location.contains("test_table"),
5964            "Location should contain table name: {}",
5965            location
5966        );
5967        assert_eq!(response.id, Some(vec!["test_table".to_string()]));
5968
5969        // Verify table no longer exists in namespace (removed from manifest)
5970        assert!(namespace.table_exists(exists_req).await.is_err());
5971
5972        // Verify physical data still exists at the returned location
5973        let dataset = Dataset::open(location).await;
5974        assert!(
5975            dataset.is_ok(),
5976            "Physical table data should still exist at {}",
5977            location
5978        );
5979    }
5980
5981    #[tokio::test]
5982    async fn test_deregister_table_in_child_namespace() {
5983        use lance_namespace::models::{
5984            CreateNamespaceRequest, DeregisterTableRequest, TableExistsRequest,
5985        };
5986
5987        let temp_dir = TempStdDir::default();
5988        let temp_path = temp_dir.to_str().unwrap();
5989
5990        let namespace = DirectoryNamespaceBuilder::new(temp_path)
5991            .build()
5992            .await
5993            .unwrap();
5994
5995        // Create child namespace
5996        let mut create_ns_req = CreateNamespaceRequest::new();
5997        create_ns_req.id = Some(vec!["test_ns".to_string()]);
5998        namespace.create_namespace(create_ns_req).await.unwrap();
5999
6000        // Create a table in the child namespace
6001        let schema = create_test_schema();
6002        let ipc_data = create_test_ipc_data(&schema);
6003
6004        let mut create_req = CreateTableRequest::new();
6005        create_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6006        namespace
6007            .create_table(create_req, bytes::Bytes::from(ipc_data))
6008            .await
6009            .unwrap();
6010
6011        // Deregister the table
6012        let mut deregister_req = DeregisterTableRequest::new();
6013        deregister_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6014        let response = namespace.deregister_table(deregister_req).await.unwrap();
6015
6016        // Should return location and id in child namespace
6017        assert!(
6018            response.location.is_some(),
6019            "Deregister should return location"
6020        );
6021        let location = response.location.as_ref().unwrap();
6022        // Location should be a proper file:// URI with the temp path
6023        // Use uri_to_url to normalize the temp path to a URL for comparison
6024        let expected_url = lance_io::object_store::uri_to_url(temp_path)
6025            .expect("Failed to convert temp path to URL");
6026        let expected_prefix = expected_url.to_string();
6027        assert!(
6028            location.starts_with(&expected_prefix),
6029            "Location should start with '{}', got: {}",
6030            expected_prefix,
6031            location
6032        );
6033        assert!(
6034            location.contains("test_ns") && location.contains("test_table"),
6035            "Location should contain namespace and table name: {}",
6036            location
6037        );
6038        assert_eq!(
6039            response.id,
6040            Some(vec!["test_ns".to_string(), "test_table".to_string()])
6041        );
6042
6043        // Verify table no longer exists
6044        let mut exists_req = TableExistsRequest::new();
6045        exists_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6046        assert!(namespace.table_exists(exists_req).await.is_err());
6047    }
6048
6049    #[tokio::test]
6050    async fn test_register_without_manifest_fails() {
6051        use lance_namespace::models::RegisterTableRequest;
6052
6053        let temp_dir = TempStdDir::default();
6054        let temp_path = temp_dir.to_str().unwrap();
6055
6056        // Create namespace without manifest
6057        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6058            .manifest_enabled(false)
6059            .build()
6060            .await
6061            .unwrap();
6062
6063        // Try to register - should fail (register requires manifest)
6064        let mut register_req = RegisterTableRequest::new("test_table.lance".to_string());
6065        register_req.id = Some(vec!["test_table".to_string()]);
6066        let result = namespace.register_table(register_req).await;
6067        assert!(result.is_err());
6068        assert!(
6069            result
6070                .unwrap_err()
6071                .to_string()
6072                .contains("manifest mode is enabled")
6073        );
6074
6075        // Note: deregister_table now works in V1 mode via .lance-deregistered marker files
6076        // See test_deregister_table_v1_mode for that test case
6077    }
6078
6079    #[tokio::test]
6080    async fn test_register_table_rejects_absolute_uri() {
6081        use lance_namespace::models::RegisterTableRequest;
6082
6083        let temp_dir = TempStdDir::default();
6084        let temp_path = temp_dir.to_str().unwrap();
6085
6086        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6087            .build()
6088            .await
6089            .unwrap();
6090
6091        // Try to register with absolute URI - should fail
6092        let mut register_req = RegisterTableRequest::new("s3://bucket/table.lance".to_string());
6093        register_req.id = Some(vec!["test_table".to_string()]);
6094        let result = namespace.register_table(register_req).await;
6095        assert!(result.is_err());
6096        let err_msg = result.unwrap_err().to_string();
6097        assert!(err_msg.contains("Absolute URIs are not allowed"));
6098    }
6099
6100    #[tokio::test]
6101    async fn test_register_table_rejects_absolute_path() {
6102        use lance_namespace::models::RegisterTableRequest;
6103
6104        let temp_dir = TempStdDir::default();
6105        let temp_path = temp_dir.to_str().unwrap();
6106
6107        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6108            .build()
6109            .await
6110            .unwrap();
6111
6112        // Try to register with absolute path - should fail
6113        let mut register_req = RegisterTableRequest::new("/tmp/table.lance".to_string());
6114        register_req.id = Some(vec!["test_table".to_string()]);
6115        let result = namespace.register_table(register_req).await;
6116        assert!(result.is_err());
6117        let err_msg = result.unwrap_err().to_string();
6118        assert!(err_msg.contains("Absolute paths are not allowed"));
6119    }
6120
6121    #[tokio::test]
6122    async fn test_register_table_rejects_path_traversal() {
6123        use lance_namespace::models::RegisterTableRequest;
6124
6125        let temp_dir = TempStdDir::default();
6126        let temp_path = temp_dir.to_str().unwrap();
6127
6128        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6129            .build()
6130            .await
6131            .unwrap();
6132
6133        // Try to register with path traversal - should fail
6134        let mut register_req = RegisterTableRequest::new("../outside/table.lance".to_string());
6135        register_req.id = Some(vec!["test_table".to_string()]);
6136        let result = namespace.register_table(register_req).await;
6137        assert!(result.is_err());
6138        let err_msg = result.unwrap_err().to_string();
6139        assert!(err_msg.contains("Path traversal is not allowed"));
6140    }
6141
6142    #[tokio::test]
6143    async fn test_namespace_write() {
6144        use arrow::array::Int32Array;
6145        use arrow::datatypes::{DataType, Field as ArrowField, Schema as ArrowSchema};
6146        use arrow::record_batch::{RecordBatch, RecordBatchIterator};
6147        use lance::dataset::{Dataset, WriteMode, WriteParams};
6148        use lance_namespace::LanceNamespace;
6149
6150        let (namespace, _temp_dir) = create_test_namespace().await;
6151        let namespace = Arc::new(namespace) as Arc<dyn LanceNamespace>;
6152
6153        // Use child namespace instead of root
6154        let table_id = vec!["test_ns".to_string(), "test_table".to_string()];
6155        let schema = Arc::new(ArrowSchema::new(vec![
6156            ArrowField::new("a", DataType::Int32, false),
6157            ArrowField::new("b", DataType::Int32, false),
6158        ]));
6159
6160        // Test 1: CREATE mode
6161        let data1 = RecordBatch::try_new(
6162            schema.clone(),
6163            vec![
6164                Arc::new(Int32Array::from(vec![1, 2, 3])),
6165                Arc::new(Int32Array::from(vec![10, 20, 30])),
6166            ],
6167        )
6168        .unwrap();
6169
6170        let reader1 = RecordBatchIterator::new(vec![data1].into_iter().map(Ok), schema.clone());
6171        let dataset =
6172            Dataset::write_into_namespace(reader1, namespace.clone(), table_id.clone(), None)
6173                .await
6174                .unwrap();
6175
6176        assert_eq!(dataset.count_rows(None).await.unwrap(), 3);
6177        assert_eq!(dataset.version().version, 1);
6178
6179        // Test 2: APPEND mode
6180        let data2 = RecordBatch::try_new(
6181            schema.clone(),
6182            vec![
6183                Arc::new(Int32Array::from(vec![4, 5])),
6184                Arc::new(Int32Array::from(vec![40, 50])),
6185            ],
6186        )
6187        .unwrap();
6188
6189        let params_append = WriteParams {
6190            mode: WriteMode::Append,
6191            ..Default::default()
6192        };
6193
6194        let reader2 = RecordBatchIterator::new(vec![data2].into_iter().map(Ok), schema.clone());
6195        let dataset = Dataset::write_into_namespace(
6196            reader2,
6197            namespace.clone(),
6198            table_id.clone(),
6199            Some(params_append),
6200        )
6201        .await
6202        .unwrap();
6203
6204        assert_eq!(dataset.count_rows(None).await.unwrap(), 5);
6205        assert_eq!(dataset.version().version, 2);
6206
6207        // Test 3: OVERWRITE mode
6208        let data3 = RecordBatch::try_new(
6209            schema.clone(),
6210            vec![
6211                Arc::new(Int32Array::from(vec![100, 200])),
6212                Arc::new(Int32Array::from(vec![1000, 2000])),
6213            ],
6214        )
6215        .unwrap();
6216
6217        let params_overwrite = WriteParams {
6218            mode: WriteMode::Overwrite,
6219            ..Default::default()
6220        };
6221
6222        let reader3 = RecordBatchIterator::new(vec![data3].into_iter().map(Ok), schema.clone());
6223        let dataset = Dataset::write_into_namespace(
6224            reader3,
6225            namespace.clone(),
6226            table_id.clone(),
6227            Some(params_overwrite),
6228        )
6229        .await
6230        .unwrap();
6231
6232        assert_eq!(dataset.count_rows(None).await.unwrap(), 2);
6233        assert_eq!(dataset.version().version, 3);
6234
6235        // Verify old data was replaced
6236        let result = dataset.scan().try_into_batch().await.unwrap();
6237        let a_col = result
6238            .column_by_name("a")
6239            .unwrap()
6240            .as_any()
6241            .downcast_ref::<Int32Array>()
6242            .unwrap();
6243        assert_eq!(a_col.values(), &[100, 200]);
6244    }
6245
6246    // ============================================================
6247    // Tests for declare_table
6248    // ============================================================
6249
6250    #[tokio::test]
6251    async fn test_declare_table_v1_mode() {
6252        use lance_namespace::models::{
6253            DeclareTableRequest, DescribeTableRequest, ListTablesRequest, TableExistsRequest,
6254        };
6255
6256        let temp_dir = TempStdDir::default();
6257        let temp_path = temp_dir.to_str().unwrap();
6258
6259        // Create namespace in V1 mode (no manifest)
6260        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6261            .manifest_enabled(false)
6262            .build()
6263            .await
6264            .unwrap();
6265
6266        // Declare a table
6267        let mut declare_req = DeclareTableRequest::new();
6268        declare_req.id = Some(vec!["test_table".to_string()]);
6269        let response = namespace.declare_table(declare_req).await.unwrap();
6270
6271        // Should return location
6272        assert!(response.location.is_some());
6273        let location = response.location.as_ref().unwrap();
6274        assert!(location.ends_with("test_table.lance"));
6275
6276        // Table should exist (via reserved file)
6277        let mut exists_req = TableExistsRequest::new();
6278        exists_req.id = Some(vec!["test_table".to_string()]);
6279        assert!(namespace.table_exists(exists_req).await.is_ok());
6280
6281        // Describe should work but return no version/schema (not written yet)
6282        let mut describe_req = DescribeTableRequest::new();
6283        describe_req.id = Some(vec!["test_table".to_string()]);
6284        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6285        assert!(describe_response.location.is_some());
6286        assert!(describe_response.version.is_none()); // Not written yet
6287        assert!(describe_response.schema.is_none()); // Not written yet
6288        assert_eq!(describe_response.is_only_declared, None);
6289
6290        let mut describe_req = DescribeTableRequest::new();
6291        describe_req.id = Some(vec!["test_table".to_string()]);
6292        describe_req.check_declared = Some(true);
6293        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6294        assert_eq!(describe_response.is_only_declared, Some(true));
6295
6296        let mut list_req = ListTablesRequest::new();
6297        list_req.id = Some(vec![]);
6298        let list_response = namespace.list_tables(list_req.clone()).await.unwrap();
6299        assert_eq!(list_response.tables, vec!["test_table".to_string()]);
6300
6301        list_req.include_declared = Some(false);
6302        let list_response = namespace.list_tables(list_req).await.unwrap();
6303        assert!(list_response.tables.is_empty());
6304    }
6305
6306    #[tokio::test]
6307    async fn test_insert_into_declared_table_promotes_it_from_declared_state() {
6308        use lance_namespace::models::{
6309            DeclareTableRequest, DescribeTableRequest, InsertIntoTableRequest,
6310        };
6311
6312        let temp_dir = TempStdDir::default();
6313        let temp_path = temp_dir.to_str().unwrap();
6314
6315        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6316            .manifest_enabled(false)
6317            .build()
6318            .await
6319            .unwrap();
6320
6321        let mut declare_req = DeclareTableRequest::new();
6322        declare_req.id = Some(vec!["test_table".to_string()]);
6323        namespace.declare_table(declare_req).await.unwrap();
6324
6325        let schema = create_test_schema();
6326        let ipc_data = create_test_ipc_data(&schema);
6327        let mut insert_req = InsertIntoTableRequest::new();
6328        insert_req.id = Some(vec!["test_table".to_string()]);
6329        namespace
6330            .insert_into_table(insert_req, bytes::Bytes::from(ipc_data))
6331            .await
6332            .unwrap();
6333
6334        let mut describe_req = DescribeTableRequest::new();
6335        describe_req.id = Some(vec!["test_table".to_string()]);
6336        describe_req.load_detailed_metadata = Some(true);
6337        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6338
6339        assert_eq!(describe_response.is_only_declared, Some(false));
6340        assert_eq!(describe_response.version, Some(1));
6341        assert!(describe_response.schema.is_some());
6342
6343        let mut list_req = ListTablesRequest::new();
6344        list_req.id = Some(vec![]);
6345        list_req.include_declared = Some(false);
6346        assert_eq!(
6347            namespace.list_tables(list_req).await.unwrap().tables,
6348            vec!["test_table".to_string()]
6349        );
6350    }
6351
6352    #[tokio::test]
6353    async fn test_create_table_after_declare_table_v1_mode_creates_table() {
6354        use lance_namespace::models::{
6355            DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6356        };
6357
6358        let temp_dir = TempStdDir::default();
6359        let temp_path = temp_dir.to_str().unwrap();
6360
6361        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6362            .manifest_enabled(false)
6363            .build()
6364            .await
6365            .unwrap();
6366
6367        let mut declare_req = DeclareTableRequest::new();
6368        declare_req.id = Some(vec!["test_table".to_string()]);
6369        namespace.declare_table(declare_req).await.unwrap();
6370
6371        let mut create_req = CreateTableRequest::new();
6372        create_req.id = Some(vec!["test_table".to_string()]);
6373        let response = namespace
6374            .create_table(
6375                create_req,
6376                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6377            )
6378            .await
6379            .unwrap();
6380
6381        assert_eq!(response.version, Some(1));
6382
6383        let mut describe_req = DescribeTableRequest::new();
6384        describe_req.id = Some(vec!["test_table".to_string()]);
6385        describe_req.load_detailed_metadata = Some(true);
6386        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6387        assert_eq!(describe_response.is_only_declared, Some(false));
6388        assert_eq!(describe_response.version, Some(1));
6389
6390        let mut list_req = ListTablesRequest::new();
6391        list_req.id = Some(vec![]);
6392        list_req.include_declared = Some(false);
6393        assert_eq!(
6394            namespace.list_tables(list_req).await.unwrap().tables,
6395            vec!["test_table".to_string()]
6396        );
6397    }
6398
6399    #[tokio::test]
6400    async fn test_insert_into_declared_table_with_manifest_promotes_it() {
6401        use lance_namespace::models::{
6402            DeclareTableRequest, DescribeTableRequest, InsertIntoTableRequest, ListTablesRequest,
6403        };
6404
6405        let temp_dir = TempStdDir::default();
6406        let temp_path = temp_dir.to_str().unwrap();
6407
6408        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6409            .manifest_enabled(true)
6410            .dir_listing_enabled(false)
6411            .build()
6412            .await
6413            .unwrap();
6414
6415        let mut declare_req = DeclareTableRequest::new();
6416        declare_req.id = Some(vec!["test_table".to_string()]);
6417        namespace.declare_table(declare_req).await.unwrap();
6418
6419        let mut insert_req = InsertIntoTableRequest::new();
6420        insert_req.id = Some(vec!["test_table".to_string()]);
6421        namespace
6422            .insert_into_table(
6423                insert_req,
6424                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6425            )
6426            .await
6427            .unwrap();
6428
6429        let mut describe_req = DescribeTableRequest::new();
6430        describe_req.id = Some(vec!["test_table".to_string()]);
6431        describe_req.load_detailed_metadata = Some(true);
6432        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6433        assert_eq!(describe_response.is_only_declared, Some(false));
6434        assert_eq!(describe_response.version, Some(1));
6435
6436        let mut list_req = ListTablesRequest::new();
6437        list_req.id = Some(vec![]);
6438        list_req.include_declared = Some(false);
6439        assert_eq!(
6440            namespace.list_tables(list_req).await.unwrap().tables,
6441            vec!["test_table".to_string()]
6442        );
6443    }
6444
6445    #[tokio::test]
6446    async fn test_create_table_after_declare_table_with_manifest_creates_table() {
6447        use lance_namespace::models::{
6448            CreateTableRequest, DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6449        };
6450
6451        let temp_dir = TempStdDir::default();
6452        let temp_path = temp_dir.to_str().unwrap();
6453
6454        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6455            .manifest_enabled(true)
6456            .dir_listing_enabled(false)
6457            .build()
6458            .await
6459            .unwrap();
6460
6461        let mut declare_req = DeclareTableRequest::new();
6462        declare_req.id = Some(vec!["test_table".to_string()]);
6463        declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6464        namespace.declare_table(declare_req).await.unwrap();
6465
6466        let mut create_req = CreateTableRequest::new();
6467        create_req.id = Some(vec!["test_table".to_string()]);
6468        create_req.mode = Some("Overwrite".to_string());
6469        let response = namespace
6470            .create_table(
6471                create_req,
6472                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6473            )
6474            .await
6475            .unwrap();
6476
6477        assert_eq!(response.version, Some(1));
6478        assert_eq!(
6479            response
6480                .properties
6481                .as_ref()
6482                .and_then(|properties| properties.get("owner")),
6483            Some(&"alice".to_string())
6484        );
6485
6486        let mut describe_req = DescribeTableRequest::new();
6487        describe_req.id = Some(vec!["test_table".to_string()]);
6488        describe_req.load_detailed_metadata = Some(true);
6489        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6490        assert_eq!(describe_response.is_only_declared, Some(false));
6491        assert_eq!(describe_response.version, Some(1));
6492        assert_eq!(
6493            describe_response
6494                .properties
6495                .as_ref()
6496                .and_then(|properties| properties.get("owner")),
6497            Some(&"alice".to_string())
6498        );
6499
6500        let mut list_req = ListTablesRequest::new();
6501        list_req.id = Some(vec![]);
6502        list_req.include_declared = Some(false);
6503        assert_eq!(
6504            namespace.list_tables(list_req).await.unwrap().tables,
6505            vec!["test_table".to_string()]
6506        );
6507    }
6508
6509    #[tokio::test]
6510    async fn test_create_table_after_declare_table_with_manifest_rejects_new_properties() {
6511        use lance_namespace::models::{CreateTableRequest, DeclareTableRequest};
6512
6513        let temp_dir = TempStdDir::default();
6514        let temp_path = temp_dir.to_str().unwrap();
6515
6516        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6517            .manifest_enabled(true)
6518            .dir_listing_enabled(false)
6519            .build()
6520            .await
6521            .unwrap();
6522
6523        let mut declare_req = DeclareTableRequest::new();
6524        declare_req.id = Some(vec!["test_table".to_string()]);
6525        declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6526        namespace.declare_table(declare_req).await.unwrap();
6527
6528        let mut create_req = CreateTableRequest::new();
6529        create_req.id = Some(vec!["test_table".to_string()]);
6530        create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6531
6532        let result = namespace
6533            .create_table(
6534                create_req,
6535                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6536            )
6537            .await;
6538
6539        assert!(result.is_err());
6540        assert!(
6541            result
6542                .unwrap_err()
6543                .to_string()
6544                .contains("cannot set properties for already declared table")
6545        );
6546    }
6547
6548    #[tokio::test]
6549    async fn test_create_table_with_manifest_exist_ok_keeps_existing_table() {
6550        use lance_namespace::models::{CreateTableRequest, DescribeTableRequest};
6551
6552        let temp_dir = TempStdDir::default();
6553        let temp_path = temp_dir.to_str().unwrap();
6554
6555        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6556            .manifest_enabled(true)
6557            .dir_listing_enabled(false)
6558            .build()
6559            .await
6560            .unwrap();
6561
6562        let mut create_req = CreateTableRequest::new();
6563        create_req.id = Some(vec!["test_table".to_string()]);
6564        create_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6565        namespace
6566            .create_table(
6567                create_req,
6568                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6569            )
6570            .await
6571            .unwrap();
6572
6573        let mut create_req = CreateTableRequest::new();
6574        create_req.id = Some(vec!["test_table".to_string()]);
6575        create_req.mode = Some("ExistOk".to_string());
6576        create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6577        let response = namespace
6578            .create_table(
6579                create_req,
6580                bytes::Bytes::from(create_single_row_test_ipc_data()),
6581            )
6582            .await
6583            .unwrap();
6584
6585        assert_eq!(
6586            response
6587                .properties
6588                .as_ref()
6589                .and_then(|properties| properties.get("owner")),
6590            Some(&"alice".to_string())
6591        );
6592        assert_eq!(
6593            open_dataset(&namespace, "test_table")
6594                .await
6595                .count_rows(None)
6596                .await
6597                .unwrap(),
6598            2
6599        );
6600
6601        let mut describe_req = DescribeTableRequest::new();
6602        describe_req.id = Some(vec!["test_table".to_string()]);
6603        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6604        assert_eq!(
6605            describe_response
6606                .properties
6607                .as_ref()
6608                .and_then(|properties| properties.get("owner")),
6609            Some(&"alice".to_string())
6610        );
6611    }
6612
6613    #[tokio::test]
6614    async fn test_create_table_with_manifest_overwrite_replaces_existing_table() {
6615        use lance_namespace::models::{CreateTableRequest, DescribeTableRequest};
6616
6617        let temp_dir = TempStdDir::default();
6618        let temp_path = temp_dir.to_str().unwrap();
6619
6620        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6621            .manifest_enabled(true)
6622            .dir_listing_enabled(false)
6623            .build()
6624            .await
6625            .unwrap();
6626
6627        let mut create_req = CreateTableRequest::new();
6628        create_req.id = Some(vec!["test_table".to_string()]);
6629        create_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6630        namespace
6631            .create_table(
6632                create_req,
6633                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6634            )
6635            .await
6636            .unwrap();
6637
6638        let mut create_req = CreateTableRequest::new();
6639        create_req.id = Some(vec!["test_table".to_string()]);
6640        create_req.mode = Some("overwrite".to_string());
6641        create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6642        let response = namespace
6643            .create_table(
6644                create_req,
6645                bytes::Bytes::from(create_single_row_test_ipc_data()),
6646            )
6647            .await
6648            .unwrap();
6649
6650        assert_eq!(response.version, Some(2));
6651        assert_eq!(
6652            response
6653                .properties
6654                .as_ref()
6655                .and_then(|properties| properties.get("owner")),
6656            Some(&"bob".to_string())
6657        );
6658        assert_eq!(
6659            open_dataset(&namespace, "test_table")
6660                .await
6661                .count_rows(None)
6662                .await
6663                .unwrap(),
6664            1
6665        );
6666
6667        let mut describe_req = DescribeTableRequest::new();
6668        describe_req.id = Some(vec!["test_table".to_string()]);
6669        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6670        assert_eq!(
6671            describe_response
6672                .properties
6673                .as_ref()
6674                .and_then(|properties| properties.get("owner")),
6675            Some(&"bob".to_string())
6676        );
6677    }
6678
6679    #[tokio::test]
6680    async fn test_create_table_with_manifest_invalid_mode_rejected() {
6681        use lance_namespace::models::CreateTableRequest;
6682
6683        let temp_dir = TempStdDir::default();
6684        let temp_path = temp_dir.to_str().unwrap();
6685
6686        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6687            .manifest_enabled(true)
6688            .dir_listing_enabled(false)
6689            .build()
6690            .await
6691            .unwrap();
6692
6693        let mut create_req = CreateTableRequest::new();
6694        create_req.id = Some(vec!["test_table".to_string()]);
6695        create_req.mode = Some("append".to_string());
6696        let result = namespace
6697            .create_table(
6698                create_req,
6699                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6700            )
6701            .await;
6702
6703        assert!(result.is_err());
6704        assert!(
6705            result
6706                .unwrap_err()
6707                .to_string()
6708                .contains("Unsupported create_table mode")
6709        );
6710    }
6711
6712    #[tokio::test]
6713    async fn test_merge_insert_into_declared_table_v1_mode_creates_table() {
6714        use lance_namespace::models::{
6715            DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6716            MergeInsertIntoTableRequest,
6717        };
6718
6719        let temp_dir = TempStdDir::default();
6720        let temp_path = temp_dir.to_str().unwrap();
6721
6722        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6723            .manifest_enabled(false)
6724            .build()
6725            .await
6726            .unwrap();
6727
6728        let mut declare_req = DeclareTableRequest::new();
6729        declare_req.id = Some(vec!["test_table".to_string()]);
6730        namespace.declare_table(declare_req).await.unwrap();
6731
6732        let mut merge_req = MergeInsertIntoTableRequest::new();
6733        merge_req.id = Some(vec!["test_table".to_string()]);
6734        merge_req.on = Some("id".to_string());
6735        let response = namespace
6736            .merge_insert_into_table(
6737                merge_req,
6738                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6739            )
6740            .await
6741            .unwrap();
6742
6743        assert_eq!(response.num_inserted_rows, Some(2));
6744        assert_eq!(response.num_updated_rows, Some(0));
6745
6746        let mut describe_req = DescribeTableRequest::new();
6747        describe_req.id = Some(vec!["test_table".to_string()]);
6748        describe_req.load_detailed_metadata = Some(true);
6749        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6750        assert_eq!(describe_response.is_only_declared, Some(false));
6751        assert_eq!(describe_response.version, Some(1));
6752
6753        let mut list_req = ListTablesRequest::new();
6754        list_req.id = Some(vec![]);
6755        list_req.include_declared = Some(false);
6756        assert_eq!(
6757            namespace.list_tables(list_req).await.unwrap().tables,
6758            vec!["test_table".to_string()]
6759        );
6760    }
6761
6762    #[tokio::test]
6763    async fn test_merge_insert_into_declared_table_with_manifest_creates_table() {
6764        use lance_namespace::models::{
6765            DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6766            MergeInsertIntoTableRequest,
6767        };
6768
6769        let temp_dir = TempStdDir::default();
6770        let temp_path = temp_dir.to_str().unwrap();
6771
6772        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6773            .manifest_enabled(true)
6774            .dir_listing_enabled(false)
6775            .build()
6776            .await
6777            .unwrap();
6778
6779        let mut declare_req = DeclareTableRequest::new();
6780        declare_req.id = Some(vec!["test_table".to_string()]);
6781        namespace.declare_table(declare_req).await.unwrap();
6782
6783        let mut merge_req = MergeInsertIntoTableRequest::new();
6784        merge_req.id = Some(vec!["test_table".to_string()]);
6785        merge_req.on = Some("id".to_string());
6786        let response = namespace
6787            .merge_insert_into_table(
6788                merge_req,
6789                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6790            )
6791            .await
6792            .unwrap();
6793
6794        assert_eq!(response.num_inserted_rows, Some(2));
6795        assert_eq!(response.num_updated_rows, Some(0));
6796
6797        let mut describe_req = DescribeTableRequest::new();
6798        describe_req.id = Some(vec!["test_table".to_string()]);
6799        describe_req.load_detailed_metadata = Some(true);
6800        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6801        assert_eq!(describe_response.is_only_declared, Some(false));
6802        assert_eq!(describe_response.version, Some(1));
6803
6804        let mut list_req = ListTablesRequest::new();
6805        list_req.id = Some(vec![]);
6806        list_req.include_declared = Some(false);
6807        assert_eq!(
6808            namespace.list_tables(list_req).await.unwrap().tables,
6809            vec!["test_table".to_string()]
6810        );
6811    }
6812
6813    #[tokio::test]
6814    async fn test_declare_table_with_manifest() {
6815        use lance_namespace::models::{
6816            DeclareTableRequest, DescribeTableRequest, ListTablesRequest, TableExistsRequest,
6817        };
6818
6819        let temp_dir = TempStdDir::default();
6820        let temp_path = temp_dir.to_str().unwrap();
6821
6822        // Create namespace with manifest
6823        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6824            .manifest_enabled(true)
6825            .dir_listing_enabled(false)
6826            .build()
6827            .await
6828            .unwrap();
6829
6830        // Declare a table
6831        let mut declare_req = DeclareTableRequest::new();
6832        declare_req.id = Some(vec!["test_table".to_string()]);
6833        declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6834        let response = namespace.declare_table(declare_req).await.unwrap();
6835
6836        // Should return location
6837        assert!(response.location.is_some());
6838        assert_eq!(
6839            response
6840                .properties
6841                .as_ref()
6842                .and_then(|properties| properties.get("owner")),
6843            Some(&"alice".to_string())
6844        );
6845
6846        // Table should exist in manifest
6847        let mut exists_req = TableExistsRequest::new();
6848        exists_req.id = Some(vec!["test_table".to_string()]);
6849        assert!(namespace.table_exists(exists_req).await.is_ok());
6850
6851        let mut describe_req = DescribeTableRequest::new();
6852        describe_req.id = Some(vec!["test_table".to_string()]);
6853        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6854        assert_eq!(describe_response.is_only_declared, None);
6855
6856        let mut describe_req = DescribeTableRequest::new();
6857        describe_req.id = Some(vec!["test_table".to_string()]);
6858        describe_req.check_declared = Some(true);
6859        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6860        assert_eq!(describe_response.is_only_declared, Some(true));
6861        assert_eq!(
6862            describe_response
6863                .properties
6864                .as_ref()
6865                .and_then(|properties| properties.get("owner")),
6866            Some(&"alice".to_string())
6867        );
6868
6869        let mut list_req = ListTablesRequest::new();
6870        list_req.id = Some(vec![]);
6871        assert_eq!(
6872            namespace
6873                .list_tables(list_req.clone())
6874                .await
6875                .unwrap()
6876                .tables,
6877            vec!["test_table".to_string()]
6878        );
6879        list_req.include_declared = Some(false);
6880        assert!(
6881            namespace
6882                .list_tables(list_req)
6883                .await
6884                .unwrap()
6885                .tables
6886                .is_empty()
6887        );
6888    }
6889
6890    #[tokio::test]
6891    async fn test_declare_table_when_table_exists() {
6892        use lance_namespace::models::DeclareTableRequest;
6893
6894        let temp_dir = TempStdDir::default();
6895        let temp_path = temp_dir.to_str().unwrap();
6896
6897        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6898            .manifest_enabled(false)
6899            .build()
6900            .await
6901            .unwrap();
6902
6903        // First create a table with actual data
6904        let schema = create_test_schema();
6905        let ipc_data = create_test_ipc_data(&schema);
6906        let mut create_req = CreateTableRequest::new();
6907        create_req.id = Some(vec!["test_table".to_string()]);
6908        namespace
6909            .create_table(create_req, bytes::Bytes::from(ipc_data))
6910            .await
6911            .unwrap();
6912
6913        // Try to declare the same table - should fail because it already has data
6914        let mut declare_req = DeclareTableRequest::new();
6915        declare_req.id = Some(vec!["test_table".to_string()]);
6916        let result = namespace.declare_table(declare_req).await;
6917        assert!(result.is_err());
6918    }
6919
6920    // ============================================================
6921    // Tests for deregister_table in V1 mode
6922    // ============================================================
6923
6924    #[tokio::test]
6925    async fn test_deregister_table_v1_mode() {
6926        use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
6927
6928        let temp_dir = TempStdDir::default();
6929        let temp_path = temp_dir.to_str().unwrap();
6930
6931        // Create namespace in V1 mode (no manifest, with dir listing)
6932        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6933            .manifest_enabled(false)
6934            .dir_listing_enabled(true)
6935            .build()
6936            .await
6937            .unwrap();
6938
6939        // Create a table with data
6940        let schema = create_test_schema();
6941        let ipc_data = create_test_ipc_data(&schema);
6942        let mut create_req = CreateTableRequest::new();
6943        create_req.id = Some(vec!["test_table".to_string()]);
6944        namespace
6945            .create_table(create_req, bytes::Bytes::from(ipc_data))
6946            .await
6947            .unwrap();
6948
6949        // Verify table exists
6950        let mut exists_req = TableExistsRequest::new();
6951        exists_req.id = Some(vec!["test_table".to_string()]);
6952        assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
6953
6954        // Deregister the table
6955        let mut deregister_req = DeregisterTableRequest::new();
6956        deregister_req.id = Some(vec!["test_table".to_string()]);
6957        let response = namespace.deregister_table(deregister_req).await.unwrap();
6958
6959        // Should return location
6960        assert!(response.location.is_some());
6961        let location = response.location.as_ref().unwrap();
6962        assert!(location.contains("test_table"));
6963
6964        // Table should no longer exist (deregistered)
6965        let result = namespace.table_exists(exists_req).await;
6966        assert!(result.is_err());
6967        assert!(result.unwrap_err().to_string().contains("deregistered"));
6968
6969        // Physical data should still exist
6970        let dataset = Dataset::open(location).await;
6971        assert!(dataset.is_ok(), "Physical table data should still exist");
6972    }
6973
6974    #[tokio::test]
6975    async fn test_deregister_table_v1_already_deregistered() {
6976        use lance_namespace::models::DeregisterTableRequest;
6977
6978        let temp_dir = TempStdDir::default();
6979        let temp_path = temp_dir.to_str().unwrap();
6980
6981        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6982            .manifest_enabled(false)
6983            .dir_listing_enabled(true)
6984            .build()
6985            .await
6986            .unwrap();
6987
6988        // Create a table
6989        let schema = create_test_schema();
6990        let ipc_data = create_test_ipc_data(&schema);
6991        let mut create_req = CreateTableRequest::new();
6992        create_req.id = Some(vec!["test_table".to_string()]);
6993        namespace
6994            .create_table(create_req, bytes::Bytes::from(ipc_data))
6995            .await
6996            .unwrap();
6997
6998        // Deregister once
6999        let mut deregister_req = DeregisterTableRequest::new();
7000        deregister_req.id = Some(vec!["test_table".to_string()]);
7001        namespace
7002            .deregister_table(deregister_req.clone())
7003            .await
7004            .unwrap();
7005
7006        // Try to deregister again - should fail
7007        let result = namespace.deregister_table(deregister_req).await;
7008        assert!(result.is_err());
7009        assert!(
7010            result
7011                .unwrap_err()
7012                .to_string()
7013                .contains("already deregistered")
7014        );
7015    }
7016
7017    // ============================================================
7018    // Tests for list_tables skipping deregistered tables
7019    // ============================================================
7020
7021    #[tokio::test]
7022    async fn test_list_tables_skips_deregistered_v1() {
7023        use lance_namespace::models::DeregisterTableRequest;
7024
7025        let temp_dir = TempStdDir::default();
7026        let temp_path = temp_dir.to_str().unwrap();
7027
7028        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7029            .manifest_enabled(false)
7030            .dir_listing_enabled(true)
7031            .build()
7032            .await
7033            .unwrap();
7034
7035        // Create two tables
7036        let schema = create_test_schema();
7037        let ipc_data = create_test_ipc_data(&schema);
7038
7039        let mut create_req1 = CreateTableRequest::new();
7040        create_req1.id = Some(vec!["table1".to_string()]);
7041        namespace
7042            .create_table(create_req1, bytes::Bytes::from(ipc_data.clone()))
7043            .await
7044            .unwrap();
7045
7046        let mut create_req2 = CreateTableRequest::new();
7047        create_req2.id = Some(vec!["table2".to_string()]);
7048        namespace
7049            .create_table(create_req2, bytes::Bytes::from(ipc_data))
7050            .await
7051            .unwrap();
7052
7053        // List tables - should see both (root namespace = empty vec)
7054        let mut list_req = ListTablesRequest::new();
7055        list_req.id = Some(vec![]);
7056        let list_response = namespace.list_tables(list_req.clone()).await.unwrap();
7057        assert_eq!(list_response.tables.len(), 2);
7058
7059        // Deregister table1
7060        let mut deregister_req = DeregisterTableRequest::new();
7061        deregister_req.id = Some(vec!["table1".to_string()]);
7062        namespace.deregister_table(deregister_req).await.unwrap();
7063
7064        // List tables - should only see table2
7065        let list_response = namespace.list_tables(list_req).await.unwrap();
7066        assert_eq!(list_response.tables.len(), 1);
7067        assert!(list_response.tables.contains(&"table2".to_string()));
7068        assert!(!list_response.tables.contains(&"table1".to_string()));
7069    }
7070
7071    // ============================================================
7072    // Tests for describe_table and table_exists with deregistered tables
7073    // ============================================================
7074
7075    #[tokio::test]
7076    async fn test_describe_table_fails_for_deregistered_v1() {
7077        use lance_namespace::models::{DeregisterTableRequest, DescribeTableRequest};
7078
7079        let temp_dir = TempStdDir::default();
7080        let temp_path = temp_dir.to_str().unwrap();
7081
7082        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7083            .manifest_enabled(false)
7084            .dir_listing_enabled(true)
7085            .build()
7086            .await
7087            .unwrap();
7088
7089        // Create a table
7090        let schema = create_test_schema();
7091        let ipc_data = create_test_ipc_data(&schema);
7092        let mut create_req = CreateTableRequest::new();
7093        create_req.id = Some(vec!["test_table".to_string()]);
7094        namespace
7095            .create_table(create_req, bytes::Bytes::from(ipc_data))
7096            .await
7097            .unwrap();
7098
7099        // Describe should work before deregistration
7100        let mut describe_req = DescribeTableRequest::new();
7101        describe_req.id = Some(vec!["test_table".to_string()]);
7102        assert!(namespace.describe_table(describe_req.clone()).await.is_ok());
7103
7104        // Deregister
7105        let mut deregister_req = DeregisterTableRequest::new();
7106        deregister_req.id = Some(vec!["test_table".to_string()]);
7107        namespace.deregister_table(deregister_req).await.unwrap();
7108
7109        // Describe should fail after deregistration
7110        let result = namespace.describe_table(describe_req).await;
7111        assert!(result.is_err());
7112        let err = result.unwrap_err();
7113        assert!(matches!(err, Error::Namespace { .. }));
7114        let err_msg = err.to_string();
7115        assert!(err_msg.contains("deregistered"));
7116        assert!(err_msg.contains("table id 'test_table'"));
7117    }
7118
7119    #[tokio::test]
7120    async fn test_table_exists_fails_for_deregistered_v1() {
7121        use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
7122
7123        let temp_dir = TempStdDir::default();
7124        let temp_path = temp_dir.to_str().unwrap();
7125
7126        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7127            .manifest_enabled(false)
7128            .dir_listing_enabled(true)
7129            .build()
7130            .await
7131            .unwrap();
7132
7133        // Create a table
7134        let schema = create_test_schema();
7135        let ipc_data = create_test_ipc_data(&schema);
7136        let mut create_req = CreateTableRequest::new();
7137        create_req.id = Some(vec!["test_table".to_string()]);
7138        namespace
7139            .create_table(create_req, bytes::Bytes::from(ipc_data))
7140            .await
7141            .unwrap();
7142
7143        // Table exists should work before deregistration
7144        let mut exists_req = TableExistsRequest::new();
7145        exists_req.id = Some(vec!["test_table".to_string()]);
7146        assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
7147
7148        // Deregister
7149        let mut deregister_req = DeregisterTableRequest::new();
7150        deregister_req.id = Some(vec!["test_table".to_string()]);
7151        namespace.deregister_table(deregister_req).await.unwrap();
7152
7153        // Table exists should fail after deregistration
7154        let result = namespace.table_exists(exists_req).await;
7155        assert!(result.is_err());
7156        let err = result.unwrap_err();
7157        assert!(matches!(err, Error::Namespace { .. }));
7158        let err_msg = err.to_string();
7159        assert!(err_msg.contains("deregistered"));
7160        assert!(err_msg.contains("table id 'test_table'"));
7161    }
7162
7163    #[tokio::test]
7164    async fn test_atomic_table_status_check() {
7165        // This test verifies that the TableStatus check is atomic
7166        // by ensuring a single directory listing is used
7167
7168        let temp_dir = TempStdDir::default();
7169        let temp_path = temp_dir.to_str().unwrap();
7170
7171        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7172            .manifest_enabled(false)
7173            .dir_listing_enabled(true)
7174            .build()
7175            .await
7176            .unwrap();
7177
7178        // Create a table
7179        let schema = create_test_schema();
7180        let ipc_data = create_test_ipc_data(&schema);
7181        let mut create_req = CreateTableRequest::new();
7182        create_req.id = Some(vec!["test_table".to_string()]);
7183        namespace
7184            .create_table(create_req, bytes::Bytes::from(ipc_data))
7185            .await
7186            .unwrap();
7187
7188        // Table status should show exists=true, is_deregistered=false
7189        let status = namespace.check_table_status("test_table").await;
7190        assert!(status.exists);
7191        assert!(!status.is_deregistered);
7192        assert!(!status.has_reserved_file);
7193    }
7194
7195    #[tokio::test]
7196    async fn test_table_version_tracking_enabled_managed_versioning() {
7197        use lance_namespace::models::DescribeTableRequest;
7198
7199        let temp_dir = TempStdDir::default();
7200        let temp_path = temp_dir.to_str().unwrap();
7201
7202        // Create namespace with table_version_tracking_enabled=true
7203        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7204            .table_version_tracking_enabled(true)
7205            .build()
7206            .await
7207            .unwrap();
7208
7209        // Create a table
7210        let schema = create_test_schema();
7211        let ipc_data = create_test_ipc_data(&schema);
7212        let mut create_req = CreateTableRequest::new();
7213        create_req.id = Some(vec!["test_table".to_string()]);
7214        namespace
7215            .create_table(create_req, bytes::Bytes::from(ipc_data))
7216            .await
7217            .unwrap();
7218
7219        // Describe table should return managed_versioning=true
7220        let mut describe_req = DescribeTableRequest::new();
7221        describe_req.id = Some(vec!["test_table".to_string()]);
7222        let describe_resp = namespace.describe_table(describe_req).await.unwrap();
7223
7224        // managed_versioning should be true
7225        assert_eq!(
7226            describe_resp.managed_versioning,
7227            Some(true),
7228            "managed_versioning should be true when table_version_tracking_enabled=true"
7229        );
7230    }
7231
7232    #[tokio::test]
7233    async fn test_table_version_tracking_disabled_no_managed_versioning() {
7234        use lance_namespace::models::DescribeTableRequest;
7235
7236        let temp_dir = TempStdDir::default();
7237        let temp_path = temp_dir.to_str().unwrap();
7238
7239        // Create namespace with table_version_tracking_enabled=false (default)
7240        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7241            .table_version_tracking_enabled(false)
7242            .build()
7243            .await
7244            .unwrap();
7245
7246        // Create a table
7247        let schema = create_test_schema();
7248        let ipc_data = create_test_ipc_data(&schema);
7249        let mut create_req = CreateTableRequest::new();
7250        create_req.id = Some(vec!["test_table".to_string()]);
7251        namespace
7252            .create_table(create_req, bytes::Bytes::from(ipc_data))
7253            .await
7254            .unwrap();
7255
7256        // Describe table should not have managed_versioning set
7257        let mut describe_req = DescribeTableRequest::new();
7258        describe_req.id = Some(vec!["test_table".to_string()]);
7259        let describe_resp = namespace.describe_table(describe_req).await.unwrap();
7260
7261        // managed_versioning should be None when table_version_tracking_enabled=false
7262        assert!(
7263            describe_resp.managed_versioning.is_none(),
7264            "managed_versioning should be None when table_version_tracking_enabled=false, got: {:?}",
7265            describe_resp.managed_versioning
7266        );
7267    }
7268
7269    #[tokio::test]
7270    async fn test_list_table_versions() {
7271        use arrow::array::{Int32Array, RecordBatchIterator};
7272        use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7273        use arrow::record_batch::RecordBatch;
7274        use lance::dataset::{Dataset, WriteMode, WriteParams};
7275        use lance_namespace::models::{CreateNamespaceRequest, ListTableVersionsRequest};
7276
7277        let temp_dir = TempStrDir::default();
7278        let temp_path: &str = &temp_dir;
7279
7280        let namespace: Arc<dyn LanceNamespace> = Arc::new(
7281            DirectoryNamespaceBuilder::new(temp_path)
7282                .table_version_tracking_enabled(true)
7283                .build()
7284                .await
7285                .unwrap(),
7286        );
7287
7288        // Create parent namespace first
7289        let mut create_ns_req = CreateNamespaceRequest::new();
7290        create_ns_req.id = Some(vec!["workspace".to_string()]);
7291        namespace.create_namespace(create_ns_req).await.unwrap();
7292
7293        // Create a table using write_into_namespace (version 1)
7294        let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7295        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7296            "id",
7297            DataType::Int32,
7298            false,
7299        )]));
7300        let batch = RecordBatch::try_new(
7301            arrow_schema.clone(),
7302            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7303        )
7304        .unwrap();
7305        let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone());
7306        let write_params = WriteParams {
7307            mode: WriteMode::Create,
7308            ..Default::default()
7309        };
7310        let mut dataset = Dataset::write_into_namespace(
7311            batches,
7312            namespace.clone(),
7313            table_id.clone(),
7314            Some(write_params),
7315        )
7316        .await
7317        .unwrap();
7318
7319        // Append to create version 2
7320        let batch2 = RecordBatch::try_new(
7321            arrow_schema.clone(),
7322            vec![Arc::new(Int32Array::from(vec![100, 200]))],
7323        )
7324        .unwrap();
7325        let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema.clone());
7326        dataset.append(batches, None).await.unwrap();
7327
7328        // Append to create version 3
7329        let batch3 = RecordBatch::try_new(
7330            arrow_schema.clone(),
7331            vec![Arc::new(Int32Array::from(vec![300, 400]))],
7332        )
7333        .unwrap();
7334        let batches = RecordBatchIterator::new(vec![Ok(batch3)], arrow_schema);
7335        dataset.append(batches, None).await.unwrap();
7336
7337        // List versions - should have versions 1, 2, and 3
7338        let mut list_req = ListTableVersionsRequest::new();
7339        list_req.id = Some(table_id.clone());
7340        let list_resp = namespace.list_table_versions(list_req).await.unwrap();
7341
7342        assert_eq!(
7343            list_resp.versions.len(),
7344            3,
7345            "Should have 3 versions, got: {:?}",
7346            list_resp.versions
7347        );
7348
7349        // Verify each version
7350        for expected_version in 1..=3 {
7351            let version = list_resp
7352                .versions
7353                .iter()
7354                .find(|v| v.version == expected_version)
7355                .unwrap_or_else(|| panic!("Expected version {}", expected_version));
7356
7357            assert!(
7358                !version.manifest_path.is_empty(),
7359                "manifest_path should be set for version {}",
7360                expected_version
7361            );
7362            assert!(
7363                version.manifest_path.contains(".manifest"),
7364                "manifest_path should contain .manifest for version {}",
7365                expected_version
7366            );
7367            assert!(
7368                version.manifest_size.is_some(),
7369                "manifest_size should be set for version {}",
7370                expected_version
7371            );
7372            assert!(
7373                version.manifest_size.unwrap() > 0,
7374                "manifest_size should be > 0 for version {}",
7375                expected_version
7376            );
7377            assert!(
7378                version.timestamp_millis.is_some(),
7379                "timestamp_millis should be set for version {}",
7380                expected_version
7381            );
7382        }
7383    }
7384
7385    #[tokio::test]
7386    async fn test_describe_table_version() {
7387        use arrow::array::{Int32Array, RecordBatchIterator};
7388        use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7389        use arrow::record_batch::RecordBatch;
7390        use lance::dataset::{Dataset, WriteMode, WriteParams};
7391        use lance_namespace::models::{CreateNamespaceRequest, DescribeTableVersionRequest};
7392
7393        let temp_dir = TempStrDir::default();
7394        let temp_path: &str = &temp_dir;
7395
7396        let namespace: Arc<dyn LanceNamespace> = Arc::new(
7397            DirectoryNamespaceBuilder::new(temp_path)
7398                .table_version_tracking_enabled(true)
7399                .build()
7400                .await
7401                .unwrap(),
7402        );
7403
7404        // Create parent namespace first
7405        let mut create_ns_req = CreateNamespaceRequest::new();
7406        create_ns_req.id = Some(vec!["workspace".to_string()]);
7407        namespace.create_namespace(create_ns_req).await.unwrap();
7408
7409        // Create a table using write_into_namespace (version 1)
7410        let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7411        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7412            "id",
7413            DataType::Int32,
7414            false,
7415        )]));
7416        let batch = RecordBatch::try_new(
7417            arrow_schema.clone(),
7418            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7419        )
7420        .unwrap();
7421        let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
7422        let write_params = WriteParams {
7423            mode: WriteMode::Create,
7424            ..Default::default()
7425        };
7426        let mut dataset = Dataset::write_into_namespace(
7427            batches,
7428            namespace.clone(),
7429            table_id.clone(),
7430            Some(write_params),
7431        )
7432        .await
7433        .unwrap();
7434
7435        // Append data to create version 2
7436        let batch2 = RecordBatch::try_new(
7437            arrow_schema.clone(),
7438            vec![Arc::new(Int32Array::from(vec![100, 200]))],
7439        )
7440        .unwrap();
7441        let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema);
7442        dataset.append(batches, None).await.unwrap();
7443
7444        // Describe version 1
7445        let mut describe_req = DescribeTableVersionRequest::new();
7446        describe_req.id = Some(table_id.clone());
7447        describe_req.version = Some(1);
7448        let describe_resp = namespace
7449            .describe_table_version(describe_req)
7450            .await
7451            .unwrap();
7452
7453        let version = &describe_resp.version;
7454        assert_eq!(version.version, 1);
7455        assert!(version.timestamp_millis.is_some());
7456        assert!(
7457            !version.manifest_path.is_empty(),
7458            "manifest_path should be set"
7459        );
7460        assert!(
7461            version.manifest_path.contains(".manifest"),
7462            "manifest_path should contain .manifest"
7463        );
7464        assert!(
7465            version.manifest_size.is_some(),
7466            "manifest_size should be set"
7467        );
7468        assert!(
7469            version.manifest_size.unwrap() > 0,
7470            "manifest_size should be > 0"
7471        );
7472
7473        // Describe version 2
7474        let mut describe_req = DescribeTableVersionRequest::new();
7475        describe_req.id = Some(table_id.clone());
7476        describe_req.version = Some(2);
7477        let describe_resp = namespace
7478            .describe_table_version(describe_req)
7479            .await
7480            .unwrap();
7481
7482        let version = &describe_resp.version;
7483        assert_eq!(version.version, 2);
7484        assert!(version.timestamp_millis.is_some());
7485        assert!(
7486            !version.manifest_path.is_empty(),
7487            "manifest_path should be set"
7488        );
7489        assert!(
7490            version.manifest_size.is_some(),
7491            "manifest_size should be set"
7492        );
7493        assert!(
7494            version.manifest_size.unwrap() > 0,
7495            "manifest_size should be > 0"
7496        );
7497    }
7498
7499    #[tokio::test]
7500    async fn test_describe_table_version_latest() {
7501        use arrow::array::{Int32Array, RecordBatchIterator};
7502        use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7503        use arrow::record_batch::RecordBatch;
7504        use lance::dataset::{Dataset, WriteMode, WriteParams};
7505        use lance_namespace::models::{CreateNamespaceRequest, DescribeTableVersionRequest};
7506
7507        let temp_dir = TempStrDir::default();
7508        let temp_path: &str = &temp_dir;
7509
7510        let namespace: Arc<dyn LanceNamespace> = Arc::new(
7511            DirectoryNamespaceBuilder::new(temp_path)
7512                .table_version_tracking_enabled(true)
7513                .build()
7514                .await
7515                .unwrap(),
7516        );
7517
7518        // Create parent namespace first
7519        let mut create_ns_req = CreateNamespaceRequest::new();
7520        create_ns_req.id = Some(vec!["workspace".to_string()]);
7521        namespace.create_namespace(create_ns_req).await.unwrap();
7522
7523        // Create a table using write_into_namespace (version 1)
7524        let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7525        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7526            "id",
7527            DataType::Int32,
7528            false,
7529        )]));
7530        let batch = RecordBatch::try_new(
7531            arrow_schema.clone(),
7532            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7533        )
7534        .unwrap();
7535        let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
7536        let write_params = WriteParams {
7537            mode: WriteMode::Create,
7538            ..Default::default()
7539        };
7540        let mut dataset = Dataset::write_into_namespace(
7541            batches,
7542            namespace.clone(),
7543            table_id.clone(),
7544            Some(write_params),
7545        )
7546        .await
7547        .unwrap();
7548
7549        // Append to create version 2
7550        let batch2 = RecordBatch::try_new(
7551            arrow_schema.clone(),
7552            vec![Arc::new(Int32Array::from(vec![100, 200]))],
7553        )
7554        .unwrap();
7555        let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema.clone());
7556        dataset.append(batches, None).await.unwrap();
7557
7558        // Append to create version 3
7559        let batch3 = RecordBatch::try_new(
7560            arrow_schema.clone(),
7561            vec![Arc::new(Int32Array::from(vec![300, 400]))],
7562        )
7563        .unwrap();
7564        let batches = RecordBatchIterator::new(vec![Ok(batch3)], arrow_schema);
7565        dataset.append(batches, None).await.unwrap();
7566
7567        // Describe latest version (no version specified)
7568        let mut describe_req = DescribeTableVersionRequest::new();
7569        describe_req.id = Some(table_id.clone());
7570        describe_req.version = None;
7571        let describe_resp = namespace
7572            .describe_table_version(describe_req)
7573            .await
7574            .unwrap();
7575
7576        // Should return version 3 as it's the latest
7577        assert_eq!(describe_resp.version.version, 3);
7578    }
7579
7580    #[tokio::test]
7581    async fn test_create_table_version() {
7582        use futures::TryStreamExt;
7583        use lance::dataset::builder::DatasetBuilder;
7584        use lance_namespace::models::CreateTableVersionRequest;
7585
7586        let temp_dir = TempStrDir::default();
7587        let temp_path: &str = &temp_dir;
7588
7589        let namespace: Arc<dyn LanceNamespace> = Arc::new(
7590            DirectoryNamespaceBuilder::new(temp_path)
7591                .table_version_tracking_enabled(true)
7592                .build()
7593                .await
7594                .unwrap(),
7595        );
7596
7597        // Create a table
7598        let schema = create_test_schema();
7599        let ipc_data = create_test_ipc_data(&schema);
7600        let mut create_req = CreateTableRequest::new();
7601        create_req.id = Some(vec!["test_table".to_string()]);
7602        namespace
7603            .create_table(create_req, bytes::Bytes::from(ipc_data))
7604            .await
7605            .unwrap();
7606
7607        // Open the dataset using from_namespace to get proper object_store and paths
7608        let table_id = vec!["test_table".to_string()];
7609        let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
7610            .await
7611            .unwrap()
7612            .load()
7613            .await
7614            .unwrap();
7615
7616        // Use dataset's object_store to find and copy the manifest
7617        let versions_path = dataset.versions_dir();
7618        let manifest_metas: Vec<_> = dataset
7619            .object_store(None)
7620            .await
7621            .unwrap()
7622            .inner
7623            .list(Some(&versions_path))
7624            .try_collect()
7625            .await
7626            .unwrap();
7627
7628        let manifest_meta = manifest_metas
7629            .iter()
7630            .find(|m| {
7631                m.location
7632                    .filename()
7633                    .map(|f| f.ends_with(".manifest"))
7634                    .unwrap_or(false)
7635            })
7636            .expect("No manifest file found");
7637
7638        // Read the existing manifest data
7639        let manifest_data = dataset
7640            .object_store(None)
7641            .await
7642            .unwrap()
7643            .inner
7644            .get(&manifest_meta.location)
7645            .await
7646            .unwrap()
7647            .bytes()
7648            .await
7649            .unwrap();
7650
7651        // Write to a staging location using the dataset's object_store
7652        let staging_path = dataset.versions_dir().join("staging_manifest");
7653        dataset
7654            .object_store(None)
7655            .await
7656            .unwrap()
7657            .inner
7658            .put(&staging_path, manifest_data.into())
7659            .await
7660            .unwrap();
7661
7662        // Create version 2 from staging manifest
7663        // Use the same naming scheme as the existing dataset (V2)
7664        let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7665        create_version_req.id = Some(table_id.clone());
7666        create_version_req.naming_scheme = Some("V2".to_string());
7667
7668        let result = namespace.create_table_version(create_version_req).await;
7669        assert!(
7670            result.is_ok(),
7671            "create_table_version should succeed: {:?}",
7672            result
7673        );
7674
7675        // Verify version 2 was created at the path returned in the response
7676        let response = result.unwrap();
7677        let version_info = response
7678            .version
7679            .expect("response should contain version info");
7680        let version_2_path = Path::parse(&version_info.manifest_path).unwrap();
7681        let head_result = dataset
7682            .object_store(None)
7683            .await
7684            .unwrap()
7685            .inner
7686            .head(&version_2_path)
7687            .await;
7688        assert!(
7689            head_result.is_ok(),
7690            "Version 2 manifest should exist at {}",
7691            version_2_path
7692        );
7693
7694        // Verify the staging file has been deleted
7695        let staging_head_result = dataset
7696            .object_store(None)
7697            .await
7698            .unwrap()
7699            .inner
7700            .head(&staging_path)
7701            .await;
7702        assert!(
7703            staging_head_result.is_err(),
7704            "Staging manifest should have been deleted after create_table_version"
7705        );
7706    }
7707
7708    #[tokio::test]
7709    async fn test_create_table_version_conflict() {
7710        // create_table_version should fail if the version already exists.
7711        // Each version always writes to a new file location.
7712        use futures::TryStreamExt;
7713        use lance::dataset::builder::DatasetBuilder;
7714        use lance_namespace::models::CreateTableVersionRequest;
7715
7716        let temp_dir = TempStrDir::default();
7717        let temp_path: &str = &temp_dir;
7718
7719        let namespace: Arc<dyn LanceNamespace> = Arc::new(
7720            DirectoryNamespaceBuilder::new(temp_path)
7721                .table_version_tracking_enabled(true)
7722                .build()
7723                .await
7724                .unwrap(),
7725        );
7726
7727        // Create a table
7728        let schema = create_test_schema();
7729        let ipc_data = create_test_ipc_data(&schema);
7730        let mut create_req = CreateTableRequest::new();
7731        create_req.id = Some(vec!["test_table".to_string()]);
7732        namespace
7733            .create_table(create_req, bytes::Bytes::from(ipc_data))
7734            .await
7735            .unwrap();
7736
7737        // Open the dataset using from_namespace to get proper object_store and paths
7738        let table_id = vec!["test_table".to_string()];
7739        let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
7740            .await
7741            .unwrap()
7742            .load()
7743            .await
7744            .unwrap();
7745
7746        // Use dataset's object_store to find and copy the manifest
7747        let versions_path = dataset.versions_dir();
7748        let manifest_metas: Vec<_> = dataset
7749            .object_store(None)
7750            .await
7751            .unwrap()
7752            .inner
7753            .list(Some(&versions_path))
7754            .try_collect()
7755            .await
7756            .unwrap();
7757
7758        let manifest_meta = manifest_metas
7759            .iter()
7760            .find(|m| {
7761                m.location
7762                    .filename()
7763                    .map(|f| f.ends_with(".manifest"))
7764                    .unwrap_or(false)
7765            })
7766            .expect("No manifest file found");
7767
7768        // Read the existing manifest data
7769        let manifest_data = dataset
7770            .object_store(None)
7771            .await
7772            .unwrap()
7773            .inner
7774            .get(&manifest_meta.location)
7775            .await
7776            .unwrap()
7777            .bytes()
7778            .await
7779            .unwrap();
7780
7781        // Write to a staging location using the dataset's object_store
7782        let staging_path = dataset.versions_dir().join("staging_manifest");
7783        dataset
7784            .object_store(None)
7785            .await
7786            .unwrap()
7787            .inner
7788            .put(&staging_path, manifest_data.into())
7789            .await
7790            .unwrap();
7791
7792        // First create version 2 (should succeed)
7793        let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7794        create_version_req.id = Some(table_id.clone());
7795        create_version_req.naming_scheme = Some("V2".to_string());
7796        let first_result = namespace.create_table_version(create_version_req).await;
7797        assert!(
7798            first_result.is_ok(),
7799            "First create_table_version for version 2 should succeed: {:?}",
7800            first_result
7801        );
7802
7803        // Get the path from the response for verification
7804        let version_2_path = Path::parse(
7805            &first_result
7806                .unwrap()
7807                .version
7808                .expect("response should contain version info")
7809                .manifest_path,
7810        )
7811        .unwrap();
7812
7813        // Create version 2 again (should fail - conflict)
7814        let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7815        create_version_req.id = Some(table_id.clone());
7816        create_version_req.naming_scheme = Some("V2".to_string());
7817
7818        let result = namespace.create_table_version(create_version_req).await;
7819        assert!(
7820            result.is_err(),
7821            "create_table_version should fail for existing version"
7822        );
7823
7824        // Verify version 2 still exists using the dataset's object_store
7825        let head_result = dataset
7826            .object_store(None)
7827            .await
7828            .unwrap()
7829            .inner
7830            .head(&version_2_path)
7831            .await;
7832        assert!(
7833            head_result.is_ok(),
7834            "Version 2 manifest should still exist at {}",
7835            version_2_path
7836        );
7837    }
7838
7839    #[tokio::test]
7840    async fn test_create_table_version_table_not_found() {
7841        use lance_namespace::models::CreateTableVersionRequest;
7842
7843        let temp_dir = TempStdDir::default();
7844        let temp_path = temp_dir.to_str().unwrap();
7845
7846        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7847            .table_version_tracking_enabled(true)
7848            .build()
7849            .await
7850            .unwrap();
7851
7852        // Try to create version for non-existent table
7853        let mut create_version_req =
7854            CreateTableVersionRequest::new(1, "/some/staging/path".to_string());
7855        create_version_req.id = Some(vec!["non_existent_table".to_string()]);
7856
7857        let result = namespace.create_table_version(create_version_req).await;
7858        assert!(
7859            result.is_err(),
7860            "create_table_version should fail for non-existent table"
7861        );
7862        let err_msg = result.unwrap_err().to_string();
7863        assert!(
7864            err_msg.contains("Table not found"),
7865            "Error should mention table not found, got: {}",
7866            err_msg
7867        );
7868    }
7869
7870    /// End-to-end integration test module for table version tracking.
7871    mod e2e_table_version_tracking {
7872        use super::*;
7873        use std::sync::atomic::{AtomicUsize, Ordering};
7874
7875        /// Tracking wrapper around a namespace that counts method invocations.
7876        struct TrackingNamespace {
7877            inner: DirectoryNamespace,
7878            create_table_version_count: AtomicUsize,
7879            describe_table_version_count: AtomicUsize,
7880            list_table_versions_count: AtomicUsize,
7881        }
7882
7883        impl TrackingNamespace {
7884            fn new(inner: DirectoryNamespace) -> Self {
7885                Self {
7886                    inner,
7887                    create_table_version_count: AtomicUsize::new(0),
7888                    describe_table_version_count: AtomicUsize::new(0),
7889                    list_table_versions_count: AtomicUsize::new(0),
7890                }
7891            }
7892
7893            fn create_table_version_calls(&self) -> usize {
7894                self.create_table_version_count.load(Ordering::SeqCst)
7895            }
7896
7897            fn describe_table_version_calls(&self) -> usize {
7898                self.describe_table_version_count.load(Ordering::SeqCst)
7899            }
7900
7901            fn list_table_versions_calls(&self) -> usize {
7902                self.list_table_versions_count.load(Ordering::SeqCst)
7903            }
7904        }
7905
7906        impl std::fmt::Debug for TrackingNamespace {
7907            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7908                f.debug_struct("TrackingNamespace")
7909                    .field(
7910                        "create_table_version_calls",
7911                        &self.create_table_version_calls(),
7912                    )
7913                    .finish()
7914            }
7915        }
7916
7917        #[async_trait]
7918        impl LanceNamespace for TrackingNamespace {
7919            async fn create_namespace(
7920                &self,
7921                request: CreateNamespaceRequest,
7922            ) -> Result<CreateNamespaceResponse> {
7923                self.inner.create_namespace(request).await
7924            }
7925
7926            async fn describe_namespace(
7927                &self,
7928                request: DescribeNamespaceRequest,
7929            ) -> Result<DescribeNamespaceResponse> {
7930                self.inner.describe_namespace(request).await
7931            }
7932
7933            async fn namespace_exists(&self, request: NamespaceExistsRequest) -> Result<()> {
7934                self.inner.namespace_exists(request).await
7935            }
7936
7937            async fn list_namespaces(
7938                &self,
7939                request: ListNamespacesRequest,
7940            ) -> Result<ListNamespacesResponse> {
7941                self.inner.list_namespaces(request).await
7942            }
7943
7944            async fn drop_namespace(
7945                &self,
7946                request: DropNamespaceRequest,
7947            ) -> Result<DropNamespaceResponse> {
7948                self.inner.drop_namespace(request).await
7949            }
7950
7951            async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
7952                self.inner.list_tables(request).await
7953            }
7954
7955            async fn describe_table(
7956                &self,
7957                request: DescribeTableRequest,
7958            ) -> Result<DescribeTableResponse> {
7959                self.inner.describe_table(request).await
7960            }
7961
7962            async fn table_exists(&self, request: TableExistsRequest) -> Result<()> {
7963                self.inner.table_exists(request).await
7964            }
7965
7966            async fn drop_table(&self, request: DropTableRequest) -> Result<DropTableResponse> {
7967                self.inner.drop_table(request).await
7968            }
7969
7970            async fn create_table(
7971                &self,
7972                request: CreateTableRequest,
7973                request_data: Bytes,
7974            ) -> Result<CreateTableResponse> {
7975                self.inner.create_table(request, request_data).await
7976            }
7977
7978            async fn declare_table(
7979                &self,
7980                request: DeclareTableRequest,
7981            ) -> Result<DeclareTableResponse> {
7982                self.inner.declare_table(request).await
7983            }
7984
7985            async fn list_table_versions(
7986                &self,
7987                request: ListTableVersionsRequest,
7988            ) -> Result<ListTableVersionsResponse> {
7989                self.list_table_versions_count
7990                    .fetch_add(1, Ordering::SeqCst);
7991                self.inner.list_table_versions(request).await
7992            }
7993
7994            async fn create_table_version(
7995                &self,
7996                request: CreateTableVersionRequest,
7997            ) -> Result<CreateTableVersionResponse> {
7998                self.create_table_version_count
7999                    .fetch_add(1, Ordering::SeqCst);
8000                self.inner.create_table_version(request).await
8001            }
8002
8003            async fn describe_table_version(
8004                &self,
8005                request: DescribeTableVersionRequest,
8006            ) -> Result<DescribeTableVersionResponse> {
8007                self.describe_table_version_count
8008                    .fetch_add(1, Ordering::SeqCst);
8009                self.inner.describe_table_version(request).await
8010            }
8011
8012            async fn batch_delete_table_versions(
8013                &self,
8014                request: BatchDeleteTableVersionsRequest,
8015            ) -> Result<BatchDeleteTableVersionsResponse> {
8016                self.inner.batch_delete_table_versions(request).await
8017            }
8018
8019            fn namespace_id(&self) -> String {
8020                self.inner.namespace_id()
8021            }
8022        }
8023
8024        #[tokio::test]
8025        async fn test_describe_table_returns_managed_versioning() {
8026            use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest};
8027
8028            let temp_dir = TempStdDir::default();
8029            let temp_path = temp_dir.to_str().unwrap();
8030
8031            // Create namespace with table_version_tracking_enabled and manifest_enabled
8032            let ns = DirectoryNamespaceBuilder::new(temp_path)
8033                .table_version_tracking_enabled(true)
8034                .manifest_enabled(true)
8035                .build()
8036                .await
8037                .unwrap();
8038
8039            // Create parent namespace
8040            let mut create_ns_req = CreateNamespaceRequest::new();
8041            create_ns_req.id = Some(vec!["workspace".to_string()]);
8042            ns.create_namespace(create_ns_req).await.unwrap();
8043
8044            // Create a table with multi-level ID (namespace + table)
8045            let schema = create_test_schema();
8046            let ipc_data = create_test_ipc_data(&schema);
8047            let mut create_req = CreateTableRequest::new();
8048            create_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]);
8049            ns.create_table(create_req, bytes::Bytes::from(ipc_data))
8050                .await
8051                .unwrap();
8052
8053            // Describe table should return managed_versioning=true
8054            let mut describe_req = DescribeTableRequest::new();
8055            describe_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]);
8056            let describe_resp = ns.describe_table(describe_req).await.unwrap();
8057
8058            // managed_versioning should be true
8059            assert_eq!(
8060                describe_resp.managed_versioning,
8061                Some(true),
8062                "managed_versioning should be true when table_version_tracking_enabled=true"
8063            );
8064        }
8065
8066        #[tokio::test]
8067        async fn test_external_manifest_store_invokes_namespace_apis() {
8068            use arrow::array::{Int32Array, StringArray};
8069            use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
8070            use arrow::record_batch::RecordBatch;
8071            use lance::Dataset;
8072            use lance::dataset::builder::DatasetBuilder;
8073            use lance::dataset::{WriteMode, WriteParams};
8074            use lance_namespace::models::CreateNamespaceRequest;
8075
8076            let temp_dir = TempStdDir::default();
8077            let temp_path = temp_dir.to_str().unwrap();
8078
8079            // Create namespace with table_version_tracking_enabled and manifest_enabled
8080            let inner_ns = DirectoryNamespaceBuilder::new(temp_path)
8081                .table_version_tracking_enabled(true)
8082                .manifest_enabled(true)
8083                .build()
8084                .await
8085                .unwrap();
8086
8087            let tracking_ns = Arc::new(TrackingNamespace::new(inner_ns));
8088            let ns: Arc<dyn LanceNamespace> = tracking_ns.clone();
8089
8090            // Create parent namespace
8091            let mut create_ns_req = CreateNamespaceRequest::new();
8092            create_ns_req.id = Some(vec!["workspace".to_string()]);
8093            ns.create_namespace(create_ns_req).await.unwrap();
8094
8095            // Create a table with multi-level ID (namespace + table)
8096            let table_id = vec!["workspace".to_string(), "test_table".to_string()];
8097
8098            // Create some initial data
8099            let arrow_schema = Arc::new(ArrowSchema::new(vec![
8100                Field::new("id", DataType::Int32, false),
8101                Field::new("name", DataType::Utf8, true),
8102            ]));
8103            let batch = RecordBatch::try_new(
8104                arrow_schema.clone(),
8105                vec![
8106                    Arc::new(Int32Array::from(vec![1, 2, 3])),
8107                    Arc::new(StringArray::from(vec!["a", "b", "c"])),
8108                ],
8109            )
8110            .unwrap();
8111
8112            // Create a table using write_into_namespace
8113            let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone());
8114            let write_params = WriteParams {
8115                mode: WriteMode::Create,
8116                ..Default::default()
8117            };
8118            let mut dataset = Dataset::write_into_namespace(
8119                batches,
8120                ns.clone(),
8121                table_id.clone(),
8122                Some(write_params),
8123            )
8124            .await
8125            .unwrap();
8126            assert_eq!(dataset.version().version, 1);
8127
8128            // Verify create_table_version was called once during initial write_into_namespace
8129            assert_eq!(
8130                tracking_ns.create_table_version_calls(),
8131                1,
8132                "create_table_version should have been called once during initial write_into_namespace"
8133            );
8134
8135            // Append data - this should call create_table_version again
8136            let append_batch = RecordBatch::try_new(
8137                arrow_schema.clone(),
8138                vec![
8139                    Arc::new(Int32Array::from(vec![4, 5, 6])),
8140                    Arc::new(StringArray::from(vec!["d", "e", "f"])),
8141                ],
8142            )
8143            .unwrap();
8144            let append_batches = RecordBatchIterator::new(vec![Ok(append_batch)], arrow_schema);
8145            dataset.append(append_batches, None).await.unwrap();
8146
8147            assert_eq!(
8148                tracking_ns.create_table_version_calls(),
8149                2,
8150                "create_table_version should have been called twice (once for create, once for append)"
8151            );
8152
8153            // checkout_latest should call list_table_versions exactly once
8154            let initial_list_calls = tracking_ns.list_table_versions_calls();
8155            let latest_dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone())
8156                .await
8157                .unwrap()
8158                .load()
8159                .await
8160                .unwrap();
8161            assert_eq!(latest_dataset.version().version, 2);
8162            assert_eq!(
8163                tracking_ns.list_table_versions_calls(),
8164                initial_list_calls + 1,
8165                "list_table_versions should have been called exactly once during checkout_latest"
8166            );
8167
8168            // checkout to specific version should call describe_table_version exactly once
8169            let initial_describe_calls = tracking_ns.describe_table_version_calls();
8170            let v1_dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone())
8171                .await
8172                .unwrap()
8173                .with_version(1)
8174                .load()
8175                .await
8176                .unwrap();
8177            assert_eq!(v1_dataset.version().version, 1);
8178            assert_eq!(
8179                tracking_ns.describe_table_version_calls(),
8180                initial_describe_calls + 1,
8181                "describe_table_version should have been called exactly once during checkout to version 1"
8182            );
8183        }
8184
8185        #[tokio::test]
8186        async fn test_dataset_commit_with_external_manifest_store() {
8187            use arrow::array::{Int32Array, StringArray};
8188            use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
8189            use arrow::record_batch::RecordBatch;
8190            use futures::TryStreamExt;
8191            use lance::dataset::{Dataset, WriteMode, WriteParams};
8192            use lance_namespace::models::CreateNamespaceRequest;
8193            use lance_table::io::commit::ManifestNamingScheme;
8194
8195            let temp_dir = TempStdDir::default();
8196            let temp_path = temp_dir.to_str().unwrap();
8197
8198            // Create namespace with table_version_tracking_enabled and manifest_enabled
8199            let inner_ns = DirectoryNamespaceBuilder::new(temp_path)
8200                .table_version_tracking_enabled(true)
8201                .manifest_enabled(true)
8202                .build()
8203                .await
8204                .unwrap();
8205
8206            let tracking_ns: Arc<dyn LanceNamespace> = Arc::new(TrackingNamespace::new(inner_ns));
8207
8208            // Create parent namespace
8209            let mut create_ns_req = CreateNamespaceRequest::new();
8210            create_ns_req.id = Some(vec!["workspace".to_string()]);
8211            tracking_ns.create_namespace(create_ns_req).await.unwrap();
8212
8213            // Create a table using write_into_namespace
8214            let table_id = vec!["workspace".to_string(), "test_table".to_string()];
8215            let arrow_schema = Arc::new(ArrowSchema::new(vec![
8216                Field::new("id", DataType::Int32, false),
8217                Field::new("name", DataType::Utf8, true),
8218            ]));
8219            let batch = RecordBatch::try_new(
8220                arrow_schema.clone(),
8221                vec![
8222                    Arc::new(Int32Array::from(vec![1, 2, 3])),
8223                    Arc::new(StringArray::from(vec!["a", "b", "c"])),
8224                ],
8225            )
8226            .unwrap();
8227            let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
8228            let write_params = WriteParams {
8229                mode: WriteMode::Create,
8230                ..Default::default()
8231            };
8232            let dataset = Dataset::write_into_namespace(
8233                batches,
8234                tracking_ns.clone(),
8235                table_id.clone(),
8236                Some(write_params),
8237            )
8238            .await
8239            .unwrap();
8240            assert_eq!(dataset.version().version, 1);
8241
8242            // Append data using write_into_namespace (APPEND mode)
8243            let batch2 = RecordBatch::try_new(
8244                arrow_schema.clone(),
8245                vec![
8246                    Arc::new(Int32Array::from(vec![4, 5, 6])),
8247                    Arc::new(StringArray::from(vec!["d", "e", "f"])),
8248                ],
8249            )
8250            .unwrap();
8251            let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema);
8252            let write_params = WriteParams {
8253                mode: WriteMode::Append,
8254                ..Default::default()
8255            };
8256            Dataset::write_into_namespace(
8257                batches,
8258                tracking_ns.clone(),
8259                table_id.clone(),
8260                Some(write_params),
8261            )
8262            .await
8263            .unwrap();
8264
8265            // Verify version 2 was created using the dataset's object_store
8266            // List manifests in the versions directory to find the V2 named manifest
8267            let manifest_metas: Vec<_> = dataset
8268                .object_store(None)
8269                .await
8270                .unwrap()
8271                .inner
8272                .list(Some(&dataset.versions_dir()))
8273                .try_collect()
8274                .await
8275                .unwrap();
8276            let version_2_found = manifest_metas.iter().any(|m| {
8277                m.location
8278                    .filename()
8279                    .map(|f| {
8280                        f.ends_with(".manifest")
8281                            && ManifestNamingScheme::V2.parse_version(f) == Some(2)
8282                    })
8283                    .unwrap_or(false)
8284            });
8285            assert!(
8286                version_2_found,
8287                "Version 2 manifest should exist in versions directory"
8288            );
8289        }
8290
8291        /// Helper: create a namespace and a table with some rows, returning (namespace, table_id)
8292        async fn create_ns_with_table() -> (DirectoryNamespace, TempStdDir, Vec<String>) {
8293            use arrow::array::{Int32Array, StringArray};
8294            use arrow::ipc::writer::StreamWriter;
8295
8296            let (namespace, temp_dir) = create_test_namespace().await;
8297
8298            let schema = create_test_schema();
8299            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8300            let arrow_schema = Arc::new(arrow_schema);
8301
8302            let id_array = Int32Array::from(vec![1, 2, 3]);
8303            let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
8304            let batch = arrow::record_batch::RecordBatch::try_new(
8305                arrow_schema.clone(),
8306                vec![Arc::new(id_array), Arc::new(name_array)],
8307            )
8308            .unwrap();
8309
8310            let mut buffer = Vec::new();
8311            {
8312                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8313                writer.write(&batch).unwrap();
8314                writer.finish().unwrap();
8315            }
8316
8317            let mut request = CreateTableRequest::new();
8318            let table_id = vec!["test_ops_table".to_string()];
8319            request.id = Some(table_id.clone());
8320
8321            namespace
8322                .create_table(request, Bytes::from(buffer))
8323                .await
8324                .unwrap();
8325
8326            (namespace, temp_dir, table_id)
8327        }
8328
8329        #[tokio::test]
8330        async fn test_count_table_rows_basic() {
8331            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8332
8333            let request = CountTableRowsRequest {
8334                id: Some(table_id),
8335                version: None,
8336                predicate: None,
8337                ..Default::default()
8338            };
8339
8340            let count = namespace.count_table_rows(request).await.unwrap();
8341            assert_eq!(count, 3);
8342        }
8343
8344        #[tokio::test]
8345        async fn test_count_table_rows_with_predicate() {
8346            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8347
8348            let request = CountTableRowsRequest {
8349                id: Some(table_id),
8350                version: None,
8351                predicate: Some("id > 1".to_string()),
8352                ..Default::default()
8353            };
8354
8355            let count = namespace.count_table_rows(request).await.unwrap();
8356            assert_eq!(count, 2);
8357        }
8358
8359        #[tokio::test]
8360        async fn test_query_table_invalid_distance_type() {
8361            let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8362
8363            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8364                single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8365                multi_vector: None,
8366            });
8367
8368            let request = QueryTableRequest {
8369                id: Some(table_id),
8370                k: 2,
8371                vector,
8372                vector_column: Some("vector".to_string()),
8373                distance_type: Some("invalid_metric".to_string()),
8374                filter: None,
8375                offset: None,
8376                version: None,
8377                ..Default::default()
8378            };
8379
8380            let result = namespace.query_table(request).await;
8381            assert!(result.is_err());
8382            let err_msg = result.unwrap_err().to_string();
8383            assert!(
8384                err_msg.contains("Unknown distance type"),
8385                "Expected error about unknown distance type, got: {}",
8386                err_msg
8387            );
8388        }
8389
8390        #[tokio::test]
8391        async fn test_insert_into_table_append() {
8392            use arrow::array::{Int32Array, StringArray};
8393            use arrow::ipc::writer::StreamWriter;
8394
8395            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8396
8397            // Prepare new data to insert
8398            let schema = create_test_schema();
8399            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8400            let arrow_schema = Arc::new(arrow_schema);
8401
8402            let id_array = Int32Array::from(vec![4, 5]);
8403            let name_array = StringArray::from(vec!["Dave", "Eve"]);
8404            let batch = arrow::record_batch::RecordBatch::try_new(
8405                arrow_schema.clone(),
8406                vec![Arc::new(id_array), Arc::new(name_array)],
8407            )
8408            .unwrap();
8409
8410            let mut buffer = Vec::new();
8411            {
8412                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8413                writer.write(&batch).unwrap();
8414                writer.finish().unwrap();
8415            }
8416
8417            let request = InsertIntoTableRequest {
8418                id: Some(table_id.clone()),
8419                mode: Some("append".to_string()),
8420                ..Default::default()
8421            };
8422
8423            let response = namespace
8424                .insert_into_table(request, Bytes::from(buffer))
8425                .await
8426                .unwrap();
8427            assert!(response.transaction_id.is_none());
8428
8429            // Verify total rows
8430            let count_req = CountTableRowsRequest {
8431                id: Some(table_id),
8432                version: None,
8433                predicate: None,
8434                ..Default::default()
8435            };
8436            let count = namespace.count_table_rows(count_req).await.unwrap();
8437            assert_eq!(count, 5);
8438        }
8439
8440        #[tokio::test]
8441        async fn test_insert_into_table_overwrite() {
8442            use arrow::array::{Int32Array, StringArray};
8443            use arrow::ipc::writer::StreamWriter;
8444
8445            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8446
8447            let schema = create_test_schema();
8448            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8449            let arrow_schema = Arc::new(arrow_schema);
8450
8451            let id_array = Int32Array::from(vec![10, 20]);
8452            let name_array = StringArray::from(vec!["X", "Y"]);
8453            let batch = arrow::record_batch::RecordBatch::try_new(
8454                arrow_schema.clone(),
8455                vec![Arc::new(id_array), Arc::new(name_array)],
8456            )
8457            .unwrap();
8458
8459            let mut buffer = Vec::new();
8460            {
8461                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8462                writer.write(&batch).unwrap();
8463                writer.finish().unwrap();
8464            }
8465
8466            let request = InsertIntoTableRequest {
8467                id: Some(table_id.clone()),
8468                mode: Some("overwrite".to_string()),
8469                ..Default::default()
8470            };
8471
8472            namespace
8473                .insert_into_table(request, Bytes::from(buffer))
8474                .await
8475                .unwrap();
8476
8477            // Verify overwrite: only 2 rows remain
8478            let count_req = CountTableRowsRequest {
8479                id: Some(table_id),
8480                version: None,
8481                predicate: None,
8482                ..Default::default()
8483            };
8484            let count = namespace.count_table_rows(count_req).await.unwrap();
8485            assert_eq!(count, 2);
8486        }
8487
8488        #[tokio::test]
8489        async fn test_insert_into_table_empty_data() {
8490            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8491
8492            let request = InsertIntoTableRequest {
8493                id: Some(table_id),
8494                mode: None,
8495                ..Default::default()
8496            };
8497
8498            let result = namespace.insert_into_table(request, Bytes::new()).await;
8499            assert!(result.is_err());
8500            assert!(
8501                result
8502                    .unwrap_err()
8503                    .to_string()
8504                    .contains("Arrow IPC stream) is required")
8505            );
8506        }
8507
8508        #[tokio::test]
8509        async fn test_insert_into_table_with_storage_options() {
8510            use arrow::array::{Int32Array, StringArray};
8511            use arrow::ipc::writer::StreamWriter;
8512
8513            let temp_dir = TempStdDir::default();
8514
8515            // Build namespace with a (no-op) storage option so self.storage_options is Some
8516            let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
8517                .storage_option("allow_http", "true")
8518                .build()
8519                .await
8520                .unwrap();
8521
8522            // Create a table first
8523            let schema = create_test_schema();
8524            let ipc_data = create_test_ipc_data(&schema);
8525            let mut create_req = CreateTableRequest::new();
8526            let table_id = vec!["so_table".to_string()];
8527            create_req.id = Some(table_id.clone());
8528            namespace
8529                .create_table(create_req, Bytes::from(ipc_data))
8530                .await
8531                .unwrap();
8532
8533            // Insert with storage_options present — covers store_params closure
8534            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8535            let arrow_schema = Arc::new(arrow_schema);
8536
8537            let id_array = Int32Array::from(vec![10, 20]);
8538            let name_array = StringArray::from(vec!["X", "Y"]);
8539            let batch = arrow::record_batch::RecordBatch::try_new(
8540                arrow_schema.clone(),
8541                vec![Arc::new(id_array), Arc::new(name_array)],
8542            )
8543            .unwrap();
8544
8545            let mut buffer = Vec::new();
8546            {
8547                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8548                writer.write(&batch).unwrap();
8549                writer.finish().unwrap();
8550            }
8551
8552            let request = InsertIntoTableRequest {
8553                id: Some(table_id.clone()),
8554                mode: Some("append".to_string()),
8555                ..Default::default()
8556            };
8557
8558            let response = namespace
8559                .insert_into_table(request, Bytes::from(buffer))
8560                .await
8561                .unwrap();
8562            assert!(response.transaction_id.is_none());
8563
8564            // Verify rows were inserted
8565            let count_req = CountTableRowsRequest {
8566                id: Some(table_id),
8567                version: None,
8568                predicate: None,
8569                ..Default::default()
8570            };
8571            let count = namespace.count_table_rows(count_req).await.unwrap();
8572            assert_eq!(count, 2);
8573        }
8574
8575        #[tokio::test]
8576        async fn test_query_table_basic() {
8577            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8578
8579            let request = QueryTableRequest {
8580                id: Some(table_id),
8581                k: 10,
8582                filter: None,
8583                offset: None,
8584                version: None,
8585                ..Default::default()
8586            };
8587
8588            let bytes = namespace.query_table(request).await.unwrap();
8589
8590            // Decode IPC and verify
8591            let cursor = Cursor::new(bytes.to_vec());
8592            let reader = FileReader::try_new(cursor, None).unwrap();
8593            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8594            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8595            assert_eq!(total_rows, 3);
8596        }
8597
8598        #[tokio::test]
8599        async fn test_query_table_with_filter() {
8600            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8601
8602            let request = QueryTableRequest {
8603                id: Some(table_id),
8604                k: 10,
8605                filter: Some("id <= 2".to_string()),
8606                offset: None,
8607                version: None,
8608                ..Default::default()
8609            };
8610
8611            let bytes = namespace.query_table(request).await.unwrap();
8612
8613            let cursor = Cursor::new(bytes.to_vec());
8614            let reader = FileReader::try_new(cursor, None).unwrap();
8615            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8616            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8617            assert_eq!(total_rows, 2);
8618        }
8619
8620        #[tokio::test]
8621        async fn test_query_table_with_limit_and_offset() {
8622            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8623
8624            let request = QueryTableRequest {
8625                id: Some(table_id),
8626                k: 2,
8627                filter: None,
8628                offset: Some(1),
8629                version: None,
8630                ..Default::default()
8631            };
8632
8633            let bytes = namespace.query_table(request).await.unwrap();
8634
8635            let cursor = Cursor::new(bytes.to_vec());
8636            let reader = FileReader::try_new(cursor, None).unwrap();
8637            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8638            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8639            assert_eq!(total_rows, 2);
8640        }
8641
8642        #[tokio::test]
8643        async fn test_query_table_no_limit() {
8644            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8645
8646            // k=0 means no limit
8647            let request = QueryTableRequest {
8648                id: Some(table_id),
8649                k: 0,
8650                filter: None,
8651                offset: None,
8652                version: None,
8653                ..Default::default()
8654            };
8655
8656            let bytes = namespace.query_table(request).await.unwrap();
8657
8658            let cursor = Cursor::new(bytes.to_vec());
8659            let reader = FileReader::try_new(cursor, None).unwrap();
8660            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8661            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8662            assert_eq!(total_rows, 3);
8663        }
8664
8665        #[tokio::test]
8666        async fn test_query_table_with_columns() {
8667            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8668
8669            let columns = Box::new(lance_namespace::models::QueryTableRequestColumns {
8670                column_names: Some(vec!["id".to_string()]),
8671                column_aliases: None,
8672            });
8673
8674            let request = QueryTableRequest {
8675                id: Some(table_id),
8676                k: 10,
8677                filter: None,
8678                offset: None,
8679                version: None,
8680                columns: Some(columns),
8681                ..Default::default()
8682            };
8683
8684            let bytes = namespace.query_table(request).await.unwrap();
8685
8686            let cursor = Cursor::new(bytes.to_vec());
8687            let reader = FileReader::try_new(cursor, None).unwrap();
8688            let schema = reader.schema();
8689            assert_eq!(schema.fields().len(), 1);
8690            assert_eq!(schema.field(0).name(), "id");
8691            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8692            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8693            assert_eq!(total_rows, 3);
8694        }
8695
8696        #[tokio::test]
8697        async fn test_count_table_rows_with_version() {
8698            use arrow::array::{Int32Array, StringArray};
8699            use arrow::ipc::writer::StreamWriter;
8700
8701            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8702
8703            // Insert more data to create version 2
8704            let schema = create_test_schema();
8705            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8706            let arrow_schema = Arc::new(arrow_schema);
8707
8708            let id_array = Int32Array::from(vec![4, 5]);
8709            let name_array = StringArray::from(vec!["Dave", "Eve"]);
8710            let batch = arrow::record_batch::RecordBatch::try_new(
8711                arrow_schema.clone(),
8712                vec![Arc::new(id_array), Arc::new(name_array)],
8713            )
8714            .unwrap();
8715
8716            let mut buffer = Vec::new();
8717            {
8718                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8719                writer.write(&batch).unwrap();
8720                writer.finish().unwrap();
8721            }
8722
8723            let request = InsertIntoTableRequest {
8724                id: Some(table_id.clone()),
8725                mode: None,
8726                ..Default::default()
8727            };
8728            namespace
8729                .insert_into_table(request, Bytes::from(buffer))
8730                .await
8731                .unwrap();
8732
8733            // Version 1 should have 3 rows
8734            let count_req = CountTableRowsRequest {
8735                id: Some(table_id.clone()),
8736                version: Some(1),
8737                predicate: None,
8738                ..Default::default()
8739            };
8740            let count = namespace.count_table_rows(count_req).await.unwrap();
8741            assert_eq!(count, 3);
8742
8743            // Latest version should have 5 rows
8744            let count_req = CountTableRowsRequest {
8745                id: Some(table_id),
8746                version: None,
8747                predicate: None,
8748                ..Default::default()
8749            };
8750            let count = namespace.count_table_rows(count_req).await.unwrap();
8751            assert_eq!(count, 5);
8752        }
8753
8754        #[tokio::test]
8755        async fn test_query_table_with_version() {
8756            use arrow::array::{Int32Array, StringArray};
8757            use arrow::ipc::writer::StreamWriter;
8758
8759            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8760
8761            // Insert more data to create version 2
8762            let schema = create_test_schema();
8763            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8764            let arrow_schema = Arc::new(arrow_schema);
8765
8766            let id_array = Int32Array::from(vec![4, 5]);
8767            let name_array = StringArray::from(vec!["Dave", "Eve"]);
8768            let batch = arrow::record_batch::RecordBatch::try_new(
8769                arrow_schema.clone(),
8770                vec![Arc::new(id_array), Arc::new(name_array)],
8771            )
8772            .unwrap();
8773
8774            let mut buffer = Vec::new();
8775            {
8776                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8777                writer.write(&batch).unwrap();
8778                writer.finish().unwrap();
8779            }
8780
8781            let request = InsertIntoTableRequest {
8782                id: Some(table_id.clone()),
8783                mode: None,
8784                ..Default::default()
8785            };
8786            namespace
8787                .insert_into_table(request, Bytes::from(buffer))
8788                .await
8789                .unwrap();
8790
8791            // Query version 1 should return 3 rows
8792            let request = QueryTableRequest {
8793                id: Some(table_id.clone()),
8794                k: 100,
8795                filter: None,
8796                offset: None,
8797                version: Some(1),
8798                ..Default::default()
8799            };
8800
8801            let bytes = namespace.query_table(request).await.unwrap();
8802            let cursor = Cursor::new(bytes.to_vec());
8803            let reader = FileReader::try_new(cursor, None).unwrap();
8804            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8805            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8806            assert_eq!(total_rows, 3);
8807
8808            // Query latest version should return 5 rows
8809            let request = QueryTableRequest {
8810                id: Some(table_id),
8811                k: 100,
8812                filter: None,
8813                offset: None,
8814                version: None,
8815                ..Default::default()
8816            };
8817
8818            let bytes = namespace.query_table(request).await.unwrap();
8819            let cursor = Cursor::new(bytes.to_vec());
8820            let reader = FileReader::try_new(cursor, None).unwrap();
8821            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8822            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8823            assert_eq!(total_rows, 5);
8824        }
8825
8826        /// Helper to create a namespace with a table that has a vector column for
8827        /// vector search tests.
8828        async fn create_ns_with_vector_table() -> (DirectoryNamespace, TempStdDir, Vec<String>) {
8829            use arrow::array::{FixedSizeListArray, Float32Array, Int32Array};
8830            use arrow::ipc::writer::StreamWriter;
8831
8832            let (namespace, temp_dir) = create_test_namespace().await;
8833
8834            // Build schema: id (int32), vector (fixed_size_list<float32>[4])
8835            let arrow_schema = Arc::new(arrow::datatypes::Schema::new(vec![
8836                arrow::datatypes::Field::new("id", arrow::datatypes::DataType::Int32, false),
8837                arrow::datatypes::Field::new(
8838                    "vector",
8839                    arrow::datatypes::DataType::FixedSizeList(
8840                        Arc::new(arrow::datatypes::Field::new(
8841                            "item",
8842                            arrow::datatypes::DataType::Float32,
8843                            true,
8844                        )),
8845                        4,
8846                    ),
8847                    true,
8848                ),
8849            ]));
8850
8851            let id_array = Int32Array::from(vec![1, 2, 3]);
8852            let values = Float32Array::from(vec![
8853                1.0, 0.0, 0.0, 0.0, // vector for id=1
8854                0.0, 1.0, 0.0, 0.0, // vector for id=2
8855                0.0, 0.0, 1.0, 0.0, // vector for id=3
8856            ]);
8857            let vector_array = FixedSizeListArray::try_new(
8858                Arc::new(arrow::datatypes::Field::new(
8859                    "item",
8860                    arrow::datatypes::DataType::Float32,
8861                    true,
8862                )),
8863                4,
8864                Arc::new(values),
8865                None,
8866            )
8867            .unwrap();
8868
8869            let batch = arrow::record_batch::RecordBatch::try_new(
8870                arrow_schema.clone(),
8871                vec![Arc::new(id_array), Arc::new(vector_array)],
8872            )
8873            .unwrap();
8874
8875            let mut buffer = Vec::new();
8876            {
8877                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8878                writer.write(&batch).unwrap();
8879                writer.finish().unwrap();
8880            }
8881
8882            // Write as a Lance dataset directly
8883            let table_name = "vector_table";
8884            let table_uri = format!("{}/{}.lance", temp_dir.to_str().unwrap(), table_name);
8885            let reader = arrow::record_batch::RecordBatchIterator::new(
8886                vec![Ok(batch)],
8887                arrow_schema.clone(),
8888            );
8889            Dataset::write(reader, &table_uri, None).await.unwrap();
8890
8891            let table_id = vec![table_name.to_string()];
8892            (namespace, temp_dir, table_id)
8893        }
8894
8895        #[tokio::test]
8896        async fn test_query_table_vector_search() {
8897            let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8898
8899            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8900                single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8901                multi_vector: None,
8902            });
8903
8904            let request = QueryTableRequest {
8905                id: Some(table_id),
8906                k: 2,
8907                vector,
8908                filter: None,
8909                offset: None,
8910                version: None,
8911                ..Default::default()
8912            };
8913
8914            let bytes = namespace.query_table(request).await.unwrap();
8915
8916            let cursor = Cursor::new(bytes.to_vec());
8917            let reader = FileReader::try_new(cursor, None).unwrap();
8918            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8919            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8920            assert_eq!(total_rows, 2);
8921        }
8922
8923        #[tokio::test]
8924        async fn test_query_table_vector_search_with_distance_type() {
8925            let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8926
8927            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8928                single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8929                multi_vector: None,
8930            });
8931
8932            let request = QueryTableRequest {
8933                id: Some(table_id),
8934                k: 3,
8935                vector,
8936                filter: None,
8937                offset: None,
8938                version: None,
8939                distance_type: Some("cosine".to_string()),
8940                ..Default::default()
8941            };
8942
8943            let bytes = namespace.query_table(request).await.unwrap();
8944
8945            let cursor = Cursor::new(bytes.to_vec());
8946            let reader = FileReader::try_new(cursor, None).unwrap();
8947            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8948            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8949            assert_eq!(total_rows, 3);
8950        }
8951
8952        #[tokio::test]
8953        async fn test_query_table_vector_search_with_filter() {
8954            let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8955
8956            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8957                single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8958                multi_vector: None,
8959            });
8960
8961            let request = QueryTableRequest {
8962                id: Some(table_id),
8963                k: 10,
8964                vector,
8965                filter: Some("id <= 2".to_string()),
8966                offset: None,
8967                version: None,
8968                ..Default::default()
8969            };
8970
8971            let bytes = namespace.query_table(request).await.unwrap();
8972
8973            let cursor = Cursor::new(bytes.to_vec());
8974            let reader = FileReader::try_new(cursor, None).unwrap();
8975            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8976            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8977            assert!(total_rows <= 2);
8978        }
8979
8980        #[tokio::test]
8981        async fn test_query_table_vector_search_with_nprobes_and_refine() {
8982            let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8983
8984            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8985                single_vector: Some(vec![0.0, 1.0, 0.0, 0.0]),
8986                multi_vector: None,
8987            });
8988
8989            let request = QueryTableRequest {
8990                id: Some(table_id),
8991                k: 2,
8992                vector,
8993                filter: None,
8994                offset: None,
8995                version: None,
8996                nprobes: Some(1),
8997                refine_factor: Some(1),
8998                prefilter: Some(true),
8999                ..Default::default()
9000            };
9001
9002            let bytes = namespace.query_table(request).await.unwrap();
9003
9004            let cursor = Cursor::new(bytes.to_vec());
9005            let reader = FileReader::try_new(cursor, None).unwrap();
9006            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
9007            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
9008            assert_eq!(total_rows, 2);
9009        }
9010
9011        #[tokio::test]
9012        async fn test_namespace_id() {
9013            let (namespace, _temp_dir) = create_test_namespace().await;
9014            let id = namespace.namespace_id();
9015            assert!(id.contains("DirectoryNamespace"));
9016            assert!(id.contains("root"));
9017        }
9018
9019        #[tokio::test]
9020        async fn test_query_table_empty_table() {
9021            let (namespace, _temp_dir) = create_test_namespace().await;
9022
9023            // Create table with empty IPC data (schema only, no rows)
9024            let schema = create_test_schema();
9025            let ipc_data = create_test_ipc_data(&schema);
9026            let mut create_request = CreateTableRequest::new();
9027            create_request.id = Some(vec!["empty_table".to_string()]);
9028            namespace
9029                .create_table(create_request, bytes::Bytes::from(ipc_data))
9030                .await
9031                .unwrap();
9032
9033            // Query the empty table — should hit the "no batches" else branch
9034            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
9035                single_vector: None,
9036                multi_vector: None,
9037            });
9038            let request = QueryTableRequest {
9039                id: Some(vec!["empty_table".to_string()]),
9040                k: 10,
9041                vector,
9042                ..Default::default()
9043            };
9044            let bytes = namespace.query_table(request).await.unwrap();
9045
9046            let cursor = Cursor::new(bytes.to_vec());
9047            let reader = FileReader::try_new(cursor, None).unwrap();
9048            let batches: Vec<_> = reader.collect::<std::result::Result<Vec<_>, _>>().unwrap();
9049            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
9050            assert_eq!(total_rows, 0, "empty table should yield no rows");
9051        }
9052
9053        #[tokio::test]
9054        async fn test_query_table_with_plain_filter_no_vector() {
9055            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
9056
9057            // Query with filter but no vector (plain scan path + filter)
9058            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
9059                single_vector: None,
9060                multi_vector: None,
9061            });
9062            let request = QueryTableRequest {
9063                id: Some(table_id),
9064                k: 0,
9065                vector,
9066                filter: Some("id > 1".to_string()),
9067                ..Default::default()
9068            };
9069            let bytes = namespace.query_table(request).await.unwrap();
9070
9071            let cursor = Cursor::new(bytes.to_vec());
9072            let reader = FileReader::try_new(cursor, None).unwrap();
9073            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
9074            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
9075            assert!(total_rows > 0);
9076            assert!(total_rows < 3);
9077        }
9078    }
9079
9080    /// Tests for multi-table transaction support via table_version_storage_enabled.
9081    mod multi_table_transactions {
9082        use super::*;
9083        use futures::TryStreamExt;
9084        use lance::dataset::builder::DatasetBuilder;
9085        use lance_namespace::models::CreateTableVersionRequest;
9086
9087        /// Helper to create a namespace with table_version_storage_enabled enabled
9088        async fn create_managed_namespace(temp_path: &str) -> Arc<DirectoryNamespace> {
9089            Arc::new(
9090                DirectoryNamespaceBuilder::new(temp_path)
9091                    .table_version_tracking_enabled(true)
9092                    .table_version_storage_enabled(true)
9093                    .manifest_enabled(true)
9094                    .build()
9095                    .await
9096                    .unwrap(),
9097            )
9098        }
9099
9100        /// Helper to create a table and get its staging manifest path
9101        async fn create_table_and_get_staging(
9102            namespace: Arc<dyn LanceNamespace>,
9103            table_name: &str,
9104        ) -> (Vec<String>, object_store::path::Path) {
9105            let schema = create_test_schema();
9106            let ipc_data = create_test_ipc_data(&schema);
9107            let mut create_req = CreateTableRequest::new();
9108            create_req.id = Some(vec![table_name.to_string()]);
9109            namespace
9110                .create_table(create_req, bytes::Bytes::from(ipc_data))
9111                .await
9112                .unwrap();
9113
9114            let table_id = vec![table_name.to_string()];
9115            let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
9116                .await
9117                .unwrap()
9118                .load()
9119                .await
9120                .unwrap();
9121
9122            // Find existing manifest and create a staging copy
9123            let versions_path = dataset.versions_dir();
9124            let manifest_metas: Vec<_> = dataset
9125                .object_store(None)
9126                .await
9127                .unwrap()
9128                .inner
9129                .list(Some(&versions_path))
9130                .try_collect()
9131                .await
9132                .unwrap();
9133
9134            let manifest_meta = manifest_metas
9135                .iter()
9136                .find(|m| {
9137                    m.location
9138                        .filename()
9139                        .map(|f| f.ends_with(".manifest"))
9140                        .unwrap_or(false)
9141                })
9142                .expect("No manifest file found");
9143
9144            let manifest_data = dataset
9145                .object_store(None)
9146                .await
9147                .unwrap()
9148                .inner
9149                .get(&manifest_meta.location)
9150                .await
9151                .unwrap()
9152                .bytes()
9153                .await
9154                .unwrap();
9155
9156            let staging_path = dataset
9157                .versions_dir()
9158                .join(format!("staging_{}", table_name));
9159            dataset
9160                .object_store(None)
9161                .await
9162                .unwrap()
9163                .inner
9164                .put(&staging_path, manifest_data.into())
9165                .await
9166                .unwrap();
9167
9168            (table_id, staging_path)
9169        }
9170
9171        #[tokio::test]
9172        async fn test_table_version_storage_enabled_requires_manifest() {
9173            // table_version_storage_enabled=true requires manifest_enabled=true
9174            let temp_dir = TempStdDir::default();
9175            let temp_path = temp_dir.to_str().unwrap();
9176
9177            let result = DirectoryNamespaceBuilder::new(temp_path)
9178                .table_version_storage_enabled(true)
9179                .manifest_enabled(false)
9180                .build()
9181                .await;
9182
9183            assert!(
9184                result.is_err(),
9185                "Should fail when table_version_storage_enabled=true but manifest_enabled=false"
9186            );
9187        }
9188
9189        #[tokio::test]
9190        async fn test_create_table_version_records_in_manifest() {
9191            // When table_version_storage_enabled is enabled, single create_table_version
9192            // should also record the version in __manifest
9193            let temp_dir = TempStrDir::default();
9194            let temp_path: &str = &temp_dir;
9195
9196            let namespace = create_managed_namespace(temp_path).await;
9197            let ns: Arc<dyn LanceNamespace> = namespace.clone();
9198
9199            let (table_id, staging_path) =
9200                create_table_and_get_staging(ns.clone(), "table_managed").await;
9201
9202            // Create version 2
9203            let mut create_req = CreateTableVersionRequest::new(2, staging_path.to_string());
9204            create_req.id = Some(table_id.clone());
9205            create_req.naming_scheme = Some("V2".to_string());
9206            let response = namespace.create_table_version(create_req).await.unwrap();
9207
9208            assert!(response.version.is_some());
9209            let version = response.version.unwrap();
9210            assert_eq!(version.version, 2);
9211
9212            // Verify the version is recorded in __manifest by querying it
9213            let manifest_ns = namespace.manifest_ns.as_ref().unwrap();
9214            let table_id_str = manifest::ManifestNamespace::str_object_id(&table_id);
9215            let versions = manifest_ns
9216                .query_table_versions(&table_id_str, false, None)
9217                .await
9218                .unwrap();
9219
9220            assert!(
9221                !versions.is_empty(),
9222                "Version should be recorded in __manifest"
9223            );
9224            let (ver, _path) = &versions[0];
9225            assert_eq!(*ver, 2, "Recorded version should be 2");
9226        }
9227    }
9228
9229    #[tokio::test]
9230    async fn test_list_all_tables() {
9231        use lance_namespace::models::ListTablesRequest;
9232
9233        let (namespace, _temp_dir) = create_test_namespace().await;
9234        create_scalar_table(&namespace, "alpha").await;
9235        create_scalar_table(&namespace, "beta").await;
9236
9237        let request = ListTablesRequest {
9238            id: Some(vec![]),
9239            page_token: None,
9240            limit: None,
9241            ..Default::default()
9242        };
9243        let response = namespace.list_all_tables(request).await.unwrap();
9244        let mut tables = response.tables;
9245        tables.sort();
9246        assert_eq!(tables, vec!["alpha", "beta"]);
9247    }
9248
9249    #[tokio::test]
9250    async fn test_restore_table() {
9251        use lance_namespace::models::RestoreTableRequest;
9252
9253        let (namespace, _temp_dir) = create_test_namespace().await;
9254        create_scalar_table(&namespace, "users").await;
9255
9256        // Create a second version by creating a scalar index (this adds a new version)
9257        create_scalar_index(&namespace, "users", "users_id_idx").await;
9258
9259        let dataset = open_dataset(&namespace, "users").await;
9260        let current_version = dataset.version().version;
9261        assert!(current_version >= 2, "Should have at least 2 versions");
9262
9263        // Restore to version 1
9264        let mut restore_req = RestoreTableRequest::new(1);
9265        restore_req.id = Some(vec!["users".to_string()]);
9266        let response = namespace.restore_table(restore_req).await.unwrap();
9267
9268        // transaction_id should be present (the restore operation)
9269        assert!(
9270            response.transaction_id.is_some(),
9271            "restore_table should return a transaction_id"
9272        );
9273
9274        // Verify the dataset now has a new version (restore creates a new version)
9275        let dataset_after = open_dataset(&namespace, "users").await;
9276        assert!(
9277            dataset_after.version().version > current_version,
9278            "Restore should create a new version"
9279        );
9280    }
9281
9282    #[tokio::test]
9283    async fn test_update_table_schema_metadata() {
9284        use lance_namespace::models::UpdateTableSchemaMetadataRequest;
9285
9286        let (namespace, _temp_dir) = create_test_namespace().await;
9287        create_scalar_table(&namespace, "products").await;
9288
9289        let mut metadata = HashMap::new();
9290        metadata.insert("owner".to_string(), "team_a".to_string());
9291        metadata.insert("version".to_string(), "1.0".to_string());
9292
9293        let mut req = UpdateTableSchemaMetadataRequest::new();
9294        req.id = Some(vec!["products".to_string()]);
9295        req.metadata = Some(metadata.clone());
9296
9297        let response = namespace.update_table_schema_metadata(req).await.unwrap();
9298
9299        assert!(response.metadata.is_some());
9300        let returned = response.metadata.unwrap();
9301        assert_eq!(returned.get("owner"), Some(&"team_a".to_string()));
9302        assert_eq!(returned.get("version"), Some(&"1.0".to_string()));
9303        assert!(
9304            response.transaction_id.is_some(),
9305            "update_table_schema_metadata should return a transaction_id"
9306        );
9307    }
9308
9309    #[tokio::test]
9310    async fn test_get_table_stats() {
9311        use lance_namespace::models::GetTableStatsRequest;
9312
9313        let (namespace, _temp_dir) = create_test_namespace().await;
9314        create_scalar_table(&namespace, "items").await;
9315        create_scalar_index(&namespace, "items", "items_id_idx").await;
9316
9317        let mut req = GetTableStatsRequest::new();
9318        req.id = Some(vec!["items".to_string()]);
9319
9320        let response = namespace.get_table_stats(req).await.unwrap();
9321        assert_eq!(response.num_rows, 3);
9322        assert_eq!(response.num_indices, 1);
9323    }
9324
9325    #[tokio::test]
9326    async fn test_explain_table_query_plan() {
9327        use lance_namespace::models::QueryTableRequestVector;
9328        use lance_namespace::models::{ExplainTableQueryPlanRequest, QueryTableRequest};
9329
9330        let (namespace, _temp_dir) = create_test_namespace().await;
9331        create_scalar_table(&namespace, "catalog").await;
9332
9333        let mut query = QueryTableRequest::new(1, QueryTableRequestVector::new());
9334        query.filter = Some("id > 1".to_string());
9335        query.columns = Some(Box::new(QueryTableRequestColumns {
9336            column_names: Some(vec!["id".to_string(), "name".to_string()]),
9337            column_aliases: None,
9338        }));
9339        query.with_row_id = Some(true);
9340
9341        let mut req = ExplainTableQueryPlanRequest::new(query);
9342        req.id = Some(vec!["catalog".to_string()]);
9343
9344        let plan_str = namespace.explain_table_query_plan(req).await.unwrap();
9345        assert_plan_contains_all(
9346            &plan_str,
9347            &[
9348                "ProjectionExec: expr=[id@0 as id, name@2 as name",
9349                "Take: columns=\"id, _rowid, (name)\"",
9350                "LanceRead: uri=",
9351                "projection=[id]",
9352                "row_id=true, row_addr=false",
9353                "full_filter=id > Int32(1)",
9354                "refine_filter=id > Int32(1)",
9355            ],
9356            "Filtered explain plan should preserve late materialization and filter pushdown",
9357        );
9358    }
9359
9360    #[tokio::test]
9361    async fn test_analyze_table_query_plan() {
9362        use lance_namespace::models::AnalyzeTableQueryPlanRequest;
9363        use lance_namespace::models::QueryTableRequestVector;
9364
9365        let (namespace, _temp_dir) = create_test_namespace().await;
9366        create_scalar_table(&namespace, "catalog").await;
9367
9368        let mut req = AnalyzeTableQueryPlanRequest::new(1, QueryTableRequestVector::new());
9369        req.id = Some(vec!["catalog".to_string()]);
9370        req.filter = Some("id > 0".to_string());
9371        req.columns = Some(Box::new(QueryTableRequestColumns {
9372            column_names: Some(vec!["id".to_string(), "name".to_string()]),
9373            column_aliases: None,
9374        }));
9375        req.with_row_id = Some(true);
9376
9377        let analysis_str = namespace.analyze_table_query_plan(req).await.unwrap();
9378        assert_plan_contains_all(
9379            &analysis_str,
9380            &[
9381                "AnalyzeExec verbose=true",
9382                "ProjectionExec: elapsed=",
9383                "expr=[id@0 as id, name@2 as name",
9384                "Take: elapsed=",
9385                "columns=\"id, _rowid, (name)\"",
9386                "CoalesceBatchesExec: elapsed=",
9387                "LanceRead: elapsed=",
9388                "projection=[id]",
9389                "row_id=true, row_addr=false",
9390                "full_filter=id > Int32(0)",
9391                "refine_filter=id > Int32(0)",
9392                "metrics=[output_rows=",
9393            ],
9394            "Filtered analyze plan should preserve late materialization and filter pushdown",
9395        );
9396    }
9397
9398    #[tokio::test]
9399    async fn test_dir_listing_no_extra_calls_without_migration() {
9400        let temp_dir = TempStdDir::default();
9401        let temp_path = temp_dir.to_str().unwrap();
9402        let root_uri = file_object_store_uri(temp_path);
9403        let listing_count = Arc::new(AtomicUsize::new(0));
9404        let session = build_listing_counting_session(listing_count.clone());
9405
9406        // Create a table using dir-listing-only namespace
9407        let dir_only_ns = DirectoryNamespaceBuilder::new(root_uri.clone())
9408            .session(session.clone())
9409            .manifest_enabled(false)
9410            .dir_listing_enabled(true)
9411            .build()
9412            .await
9413            .unwrap();
9414
9415        let schema = create_test_schema();
9416        let ipc_data = create_test_ipc_data(&schema);
9417        let mut create_req = CreateTableRequest::new();
9418        create_req.id = Some(vec!["test_table".to_string()]);
9419        dir_only_ns
9420            .create_table(create_req, Bytes::from(ipc_data))
9421            .await
9422            .unwrap();
9423
9424        // Build a namespace with both enabled but migration disabled (default)
9425        let hybrid_ns = DirectoryNamespaceBuilder::new(root_uri)
9426            .session(session)
9427            .manifest_enabled(true)
9428            .dir_listing_enabled(true)
9429            .dir_listing_to_manifest_migration_enabled(false)
9430            .build()
9431            .await
9432            .unwrap();
9433
9434        // Reset counter before the operation we want to measure
9435        listing_count.store(0, Ordering::SeqCst);
9436
9437        // table_exists should use dir listing directly, making only 1 listing call
9438        let mut exists_req = TableExistsRequest::new();
9439        exists_req.id = Some(vec!["test_table".to_string()]);
9440        hybrid_ns.table_exists(exists_req).await.unwrap();
9441
9442        let count = listing_count.load(Ordering::SeqCst);
9443        assert_eq!(
9444            count, 1,
9445            "Expected exactly 1 listing call for table_exists \
9446             without migration mode, but got {}",
9447            count
9448        );
9449
9450        // Reset and test describe_table
9451        listing_count.store(0, Ordering::SeqCst);
9452
9453        let mut describe_req = DescribeTableRequest::new();
9454        describe_req.id = Some(vec!["test_table".to_string()]);
9455        hybrid_ns.describe_table(describe_req).await.unwrap();
9456
9457        let count = listing_count.load(Ordering::SeqCst);
9458        assert_eq!(
9459            count, 1,
9460            "Expected exactly 1 listing call for describe_table \
9461             without migration mode, but got {}",
9462            count
9463        );
9464    }
9465
9466    #[tokio::test]
9467    async fn test_describe_declared_table_checks_versions_only_when_requested() {
9468        let temp_dir = TempStdDir::default();
9469        let temp_path = temp_dir.to_str().unwrap();
9470        let root_uri = file_object_store_uri(temp_path);
9471        let listing_count = Arc::new(AtomicUsize::new(0));
9472        let session = build_listing_counting_session(listing_count.clone());
9473
9474        let namespace = DirectoryNamespaceBuilder::new(root_uri)
9475            .session(session)
9476            .manifest_enabled(false)
9477            .dir_listing_enabled(true)
9478            .build()
9479            .await
9480            .unwrap();
9481
9482        let mut declare_req = DeclareTableRequest::new();
9483        declare_req.id = Some(vec!["test_table".to_string()]);
9484        namespace.declare_table(declare_req).await.unwrap();
9485
9486        listing_count.store(0, Ordering::SeqCst);
9487
9488        let mut describe_req = DescribeTableRequest::new();
9489        describe_req.id = Some(vec!["test_table".to_string()]);
9490        let describe_response = namespace.describe_table(describe_req).await.unwrap();
9491
9492        assert_eq!(describe_response.is_only_declared, None);
9493        assert_eq!(
9494            listing_count.load(Ordering::SeqCst),
9495            1,
9496            "Default describe_table should only list the table directory"
9497        );
9498
9499        listing_count.store(0, Ordering::SeqCst);
9500
9501        let mut describe_req = DescribeTableRequest::new();
9502        describe_req.id = Some(vec!["test_table".to_string()]);
9503        describe_req.check_declared = Some(true);
9504        let describe_response = namespace.describe_table(describe_req).await.unwrap();
9505
9506        assert_eq!(describe_response.is_only_declared, Some(true));
9507        assert_eq!(
9508            listing_count.load(Ordering::SeqCst),
9509            2,
9510            "check_declared describe_table should list the table directory and _versions"
9511        );
9512    }
9513
9514    #[tokio::test]
9515    async fn test_dir_listing_extra_calls_with_migration() {
9516        let temp_dir = TempStdDir::default();
9517        let temp_path = temp_dir.to_str().unwrap();
9518        let root_uri = file_object_store_uri(temp_path);
9519        let listing_count = Arc::new(AtomicUsize::new(0));
9520        let session = build_listing_counting_session(listing_count.clone());
9521
9522        // Create a table using dir-listing-only namespace so it exists physically but is absent from __manifest.
9523        let dir_only_ns = DirectoryNamespaceBuilder::new(root_uri.clone())
9524            .session(session.clone())
9525            .manifest_enabled(false)
9526            .dir_listing_enabled(true)
9527            .build()
9528            .await
9529            .unwrap();
9530
9531        let schema = create_test_schema();
9532        let ipc_data = create_test_ipc_data(&schema);
9533        let mut create_req = CreateTableRequest::new();
9534        create_req.id = Some(vec!["test_table".to_string()]);
9535        dir_only_ns
9536            .create_table(create_req, Bytes::from(ipc_data))
9537            .await
9538            .unwrap();
9539
9540        let hybrid_ns = DirectoryNamespaceBuilder::new(root_uri)
9541            .session(session)
9542            .manifest_enabled(true)
9543            .dir_listing_enabled(true)
9544            .dir_listing_to_manifest_migration_enabled(true)
9545            .build()
9546            .await
9547            .unwrap();
9548
9549        // table_exists first checks __manifest (which on local FS uses the
9550        // version hint and does no list call), then falls back to the table
9551        // directory (one list_with_delimiter on test_table.lance).
9552        listing_count.store(0, Ordering::SeqCst);
9553
9554        let mut exists_req = TableExistsRequest::new();
9555        exists_req.id = Some(vec!["test_table".to_string()]);
9556        hybrid_ns.table_exists(exists_req).await.unwrap();
9557
9558        let count = listing_count.load(Ordering::SeqCst);
9559        assert_eq!(
9560            count, 1,
9561            "Expected exactly 1 listing call for table_exists with migration mode \
9562             (table directory fallback; manifest reload uses the version hint), but got {}",
9563            count
9564        );
9565
9566        // describe_table follows the same path when the table is not yet registered in __manifest.
9567        listing_count.store(0, Ordering::SeqCst);
9568
9569        let mut describe_req = DescribeTableRequest::new();
9570        describe_req.id = Some(vec!["test_table".to_string()]);
9571        hybrid_ns.describe_table(describe_req).await.unwrap();
9572
9573        let count = listing_count.load(Ordering::SeqCst);
9574        assert_eq!(
9575            count, 1,
9576            "Expected exactly 1 listing call for describe_table with migration mode \
9577             (table directory fallback; manifest reload uses the version hint), but got {}",
9578            count
9579        );
9580    }
9581
9582    #[tokio::test]
9583    async fn test_migration_not_found_errors_include_table_id() {
9584        let temp_dir = TempStdDir::default();
9585        let temp_path = temp_dir.to_str().unwrap();
9586
9587        let namespace = DirectoryNamespaceBuilder::new(temp_path)
9588            .manifest_enabled(true)
9589            .dir_listing_enabled(true)
9590            .dir_listing_to_manifest_migration_enabled(true)
9591            .build()
9592            .await
9593            .unwrap();
9594
9595        let mut exists_req = TableExistsRequest::new();
9596        exists_req.id = Some(vec!["missing_table".to_string()]);
9597        let err = namespace.table_exists(exists_req).await.unwrap_err();
9598        assert!(matches!(err, Error::Namespace { .. }));
9599        let err_msg = err.to_string();
9600        assert!(err_msg.contains("Table not found"));
9601        assert!(err_msg.contains("table id 'missing_table'"));
9602
9603        let mut describe_req = DescribeTableRequest::new();
9604        describe_req.id = Some(vec!["missing_table".to_string()]);
9605        let err = namespace.describe_table(describe_req).await.unwrap_err();
9606        assert!(matches!(err, Error::Namespace { .. }));
9607        let err_msg = err.to_string();
9608        assert!(err_msg.contains("Table not found"));
9609        assert!(err_msg.contains("table id 'missing_table'"));
9610    }
9611
9612    #[tokio::test]
9613    async fn test_manifest_not_found_errors_include_full_table_id() {
9614        use lance_namespace::models::CreateNamespaceRequest;
9615
9616        let temp_dir = TempStdDir::default();
9617        let temp_path = temp_dir.to_str().unwrap();
9618
9619        let namespace = DirectoryNamespaceBuilder::new(temp_path)
9620            .manifest_enabled(true)
9621            .dir_listing_enabled(true)
9622            .build()
9623            .await
9624            .unwrap();
9625
9626        let mut create_ns_req = CreateNamespaceRequest::new();
9627        create_ns_req.id = Some(vec!["workspace".to_string()]);
9628        namespace.create_namespace(create_ns_req).await.unwrap();
9629
9630        let missing_table_id = vec!["workspace".to_string(), "missing_table".to_string()];
9631
9632        let mut exists_req = TableExistsRequest::new();
9633        exists_req.id = Some(missing_table_id.clone());
9634        let err = namespace.table_exists(exists_req).await.unwrap_err();
9635        assert!(matches!(err, Error::Namespace { .. }));
9636        let err_msg = err.to_string();
9637        assert!(err_msg.contains("Table not found"));
9638        assert!(err_msg.contains("table id 'workspace$missing_table'"));
9639
9640        let mut describe_req = DescribeTableRequest::new();
9641        describe_req.id = Some(missing_table_id);
9642        let err = namespace.describe_table(describe_req).await.unwrap_err();
9643        assert!(matches!(err, Error::Namespace { .. }));
9644        let err_msg = err.to_string();
9645        assert!(err_msg.contains("Table not found"));
9646        assert!(err_msg.contains("table id 'workspace$missing_table'"));
9647    }
9648}