Skip to main content

lance_namespace_impls/
dir.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Directory-based Lance Namespace implementation.
5//!
6//! This module provides a directory-based implementation of the Lance namespace
7//! that stores tables as Lance datasets in a filesystem directory structure.
8
9pub mod manifest;
10
11use arrow::array::Float32Array;
12use arrow::record_batch::RecordBatchIterator;
13use arrow_ipc::reader::StreamReader;
14use async_trait::async_trait;
15use bytes::Bytes;
16use futures::{StreamExt, TryStreamExt};
17use lance::dataset::builder::DatasetBuilder;
18use lance::dataset::scanner::Scanner;
19use lance::dataset::statistics::DatasetStatisticsExt;
20use lance::dataset::transaction::{Operation, Transaction};
21use lance::dataset::{
22    Dataset, MergeInsertBuilder, WhenMatched, WhenNotMatched, WhenNotMatchedBySource, WriteMode,
23    WriteParams,
24};
25use lance::index::{DatasetIndexExt, IndexParams, vector::VectorIndexParams};
26use lance::session::Session;
27use lance_index::scalar::{
28    BuiltinIndexType, FullTextSearchQuery, InvertedIndexParams, ScalarIndexParams,
29};
30use lance_index::vector::{
31    bq::RQBuildParams, hnsw::builder::HnswBuildParams, ivf::IvfBuildParams, pq::PQBuildParams,
32    sq::builder::SQBuildParams,
33};
34use lance_index::{IndexType, is_system_index};
35use lance_io::object_store::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry};
36use lance_linalg::distance::MetricType;
37use lance_table::io::commit::{ManifestNamingScheme, VERSIONS_DIR};
38use object_store::path::Path;
39use object_store::{Error as ObjectStoreError, ObjectStore as OSObjectStore, PutMode, PutOptions};
40use std::collections::HashMap;
41use std::io::Cursor;
42use std::sync::{Arc, Mutex};
43
44use crate::context::DynamicContextProvider;
45use lance_namespace::models::{
46    AnalyzeTableQueryPlanRequest, BatchDeleteTableVersionsRequest,
47    BatchDeleteTableVersionsResponse, CountTableRowsRequest, CreateNamespaceRequest,
48    CreateNamespaceResponse, CreateTableIndexRequest, CreateTableIndexResponse, CreateTableRequest,
49    CreateTableResponse, CreateTableScalarIndexResponse, CreateTableVersionRequest,
50    CreateTableVersionResponse, DeclareTableRequest, DeclareTableResponse,
51    DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableIndexStatsRequest,
52    DescribeTableIndexStatsResponse, DescribeTableRequest, DescribeTableResponse,
53    DescribeTableVersionRequest, DescribeTableVersionResponse, DescribeTransactionRequest,
54    DescribeTransactionResponse, DropNamespaceRequest, DropNamespaceResponse,
55    DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, DropTableResponse,
56    ExplainTableQueryPlanRequest, FragmentStats, FragmentSummary, GetTableStatsRequest,
57    GetTableStatsResponse, Identity, IndexContent, InsertIntoTableRequest, InsertIntoTableResponse,
58    ListNamespacesRequest, ListNamespacesResponse, ListTableIndicesRequest,
59    ListTableIndicesResponse, ListTableVersionsRequest, ListTableVersionsResponse,
60    ListTablesRequest, ListTablesResponse, MergeInsertIntoTableRequest,
61    MergeInsertIntoTableResponse, NamespaceExistsRequest, QueryTableRequest,
62    QueryTableRequestColumns, QueryTableRequestVector, RestoreTableRequest, RestoreTableResponse,
63    TableExistsRequest, TableVersion, UpdateTableSchemaMetadataRequest,
64    UpdateTableSchemaMetadataResponse,
65};
66
67use lance_core::{Error, Result};
68use lance_namespace::LanceNamespace;
69use lance_namespace::error::NamespaceError;
70use lance_namespace::schema::arrow_schema_to_json;
71
72use crate::credentials::{
73    CredentialVendor, create_credential_vendor_for_location, has_credential_vendor_config,
74};
75
76/// Thread-safe metrics tracker for namespace operations.
77///
78/// Tracks the count of each API operation when `ops_metrics_enabled` is true.
79/// Use `retrieve()` to get a snapshot of all operation counts.
80#[derive(Debug, Default)]
81pub struct OpsMetrics {
82    counters: Mutex<HashMap<String, u64>>,
83}
84
85impl OpsMetrics {
86    /// Increment the counter for an operation.
87    pub fn increment(&self, operation: &str) {
88        if let Ok(mut counters) = self.counters.lock() {
89            *counters.entry(operation.to_string()).or_insert(0) += 1;
90        }
91    }
92
93    /// Get a snapshot of all operation counts.
94    pub fn retrieve(&self) -> HashMap<String, u64> {
95        self.counters.lock().map(|c| c.clone()).unwrap_or_default()
96    }
97
98    /// Reset all counters to zero.
99    pub fn reset(&self) {
100        if let Ok(mut counters) = self.counters.lock() {
101            counters.clear();
102        }
103    }
104}
105
106/// Result of checking table status atomically.
107///
108/// This struct captures the state of a table directory in a single snapshot,
109/// avoiding race conditions between checking existence and other status flags.
110pub(crate) struct TableStatus {
111    /// Whether the table directory exists (has any files)
112    pub(crate) exists: bool,
113    /// Whether the table has a `.lance-deregistered` marker file
114    pub(crate) is_deregistered: bool,
115    /// Whether the table has a `.lance-reserved` marker file (declared but not written)
116    pub(crate) has_reserved_file: bool,
117}
118
119enum DirectoryIndexParams {
120    Scalar {
121        index_type: IndexType,
122        params: ScalarIndexParams,
123    },
124    Inverted(InvertedIndexParams),
125    Vector {
126        index_type: IndexType,
127        params: VectorIndexParams,
128    },
129}
130
131impl DirectoryIndexParams {
132    fn index_type(&self) -> IndexType {
133        match self {
134            Self::Scalar { index_type, .. } | Self::Vector { index_type, .. } => *index_type,
135            Self::Inverted(_) => IndexType::Inverted,
136        }
137    }
138
139    fn params(&self) -> &dyn IndexParams {
140        match self {
141            Self::Scalar { params, .. } => params,
142            Self::Inverted(params) => params,
143            Self::Vector { params, .. } => params,
144        }
145    }
146}
147
148/// Builder for creating a DirectoryNamespace.
149///
150/// This builder provides a fluent API for configuring and establishing
151/// connections to directory-based Lance namespaces.
152///
153/// # Examples
154///
155/// ```no_run
156/// # use lance_namespace_impls::DirectoryNamespaceBuilder;
157/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
158/// // Create a local directory namespace
159/// let namespace = DirectoryNamespaceBuilder::new("/path/to/data")
160///     .build()
161///     .await?;
162/// # Ok(())
163/// # }
164/// ```
165///
166/// ```no_run
167/// # use lance_namespace_impls::DirectoryNamespaceBuilder;
168/// # use lance::session::Session;
169/// # use std::sync::Arc;
170/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
171/// // Create with custom storage options and session
172/// let session = Arc::new(Session::default());
173/// let namespace = DirectoryNamespaceBuilder::new("s3://bucket/path")
174///     .storage_option("region", "us-west-2")
175///     .storage_option("access_key_id", "key")
176///     .session(session)
177///     .build()
178///     .await?;
179/// # Ok(())
180/// # }
181/// ```
182#[derive(Clone)]
183pub struct DirectoryNamespaceBuilder {
184    root: String,
185    storage_options: Option<HashMap<String, String>>,
186    session: Option<Arc<Session>>,
187    manifest_enabled: bool,
188    dir_listing_enabled: bool,
189    inline_optimization_enabled: bool,
190    table_version_tracking_enabled: bool,
191    /// When true, table versions are stored in the `__manifest` table instead of
192    /// relying on Lance's native version management.
193    table_version_storage_enabled: bool,
194    /// When true, enables migration mode where the namespace checks the manifest first
195    /// before falling back to directory listing for root-level tables. When false (default),
196    /// root-level tables use directory listing directly without checking the manifest,
197    /// avoiding extra object store calls.
198    dir_listing_to_manifest_migration_enabled: bool,
199    credential_vendor_properties: HashMap<String, String>,
200    context_provider: Option<Arc<dyn DynamicContextProvider>>,
201    commit_retries: Option<u32>,
202    /// When true, returns input storage options in describe_table/declare_table responses
203    /// when no credential vendor is configured. Useful for testing. Default: false.
204    vend_input_storage_options: bool,
205    /// When set, adds expires_at_millis to vended storage options. The value is calculated
206    /// as current_time_millis + this interval. This allows clients to know when to refresh
207    /// credentials by calling describe_table again. Only effective when vend_input_storage_options
208    /// is true.
209    vend_input_storage_options_refresh_interval_millis: Option<u64>,
210    /// When true, tracks operation metrics. Default: false.
211    ops_metrics_enabled: bool,
212}
213
214impl std::fmt::Debug for DirectoryNamespaceBuilder {
215    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216        f.debug_struct("DirectoryNamespaceBuilder")
217            .field("root", &self.root)
218            .field("storage_options", &self.storage_options)
219            .field("manifest_enabled", &self.manifest_enabled)
220            .field("dir_listing_enabled", &self.dir_listing_enabled)
221            .field(
222                "inline_optimization_enabled",
223                &self.inline_optimization_enabled,
224            )
225            .field(
226                "table_version_tracking_enabled",
227                &self.table_version_tracking_enabled,
228            )
229            .field(
230                "table_version_storage_enabled",
231                &self.table_version_storage_enabled,
232            )
233            .field(
234                "dir_listing_to_manifest_migration_enabled",
235                &self.dir_listing_to_manifest_migration_enabled,
236            )
237            .field(
238                "context_provider",
239                &self.context_provider.as_ref().map(|_| "Some(...)"),
240            )
241            .field(
242                "vend_input_storage_options",
243                &self.vend_input_storage_options,
244            )
245            .field(
246                "vend_input_storage_options_refresh_interval_millis",
247                &self.vend_input_storage_options_refresh_interval_millis,
248            )
249            .field("ops_metrics_enabled", &self.ops_metrics_enabled)
250            .finish()
251    }
252}
253
254impl DirectoryNamespaceBuilder {
255    /// Create a new DirectoryNamespaceBuilder with the specified root path.
256    ///
257    /// # Arguments
258    ///
259    /// * `root` - Root directory path (local path or cloud URI like s3://bucket/path)
260    pub fn new(root: impl Into<String>) -> Self {
261        Self {
262            root: root.into().trim_end_matches('/').to_string(),
263            storage_options: None,
264            session: None,
265            manifest_enabled: true,
266            dir_listing_enabled: true, // Default to enabled for backwards compatibility
267            inline_optimization_enabled: true,
268            table_version_tracking_enabled: false, // Default to disabled
269            table_version_storage_enabled: false,  // Default to disabled
270            dir_listing_to_manifest_migration_enabled: false, // Default to disabled
271            credential_vendor_properties: HashMap::new(),
272            context_provider: None,
273            commit_retries: None,
274            vend_input_storage_options: false,
275            vend_input_storage_options_refresh_interval_millis: None,
276            ops_metrics_enabled: false,
277        }
278    }
279
280    /// Enable or disable manifest-based listing.
281    ///
282    /// When enabled (default), the namespace uses a `__manifest` table to track tables.
283    /// When disabled, relies solely on directory scanning.
284    pub fn manifest_enabled(mut self, enabled: bool) -> Self {
285        self.manifest_enabled = enabled;
286        self
287    }
288
289    /// Enable or disable directory-based listing fallback.
290    ///
291    /// When enabled (default), falls back to directory scanning for tables not in the manifest.
292    /// When disabled, only consults the manifest table.
293    pub fn dir_listing_enabled(mut self, enabled: bool) -> Self {
294        self.dir_listing_enabled = enabled;
295        self
296    }
297
298    /// Enable or disable migration mode from directory listing to manifest.
299    ///
300    /// When enabled, root-level table operations check the manifest first before
301    /// falling back to directory listing. When disabled (default), root-level tables
302    /// use directory listing directly, avoiding extra object store calls.
303    /// Only relevant when both `manifest_enabled` and `dir_listing_enabled` are true.
304    pub fn dir_listing_to_manifest_migration_enabled(mut self, enabled: bool) -> Self {
305        self.dir_listing_to_manifest_migration_enabled = enabled;
306        self
307    }
308
309    /// Enable or disable inline optimization of the __manifest table.
310    ///
311    /// When enabled (default), performs compaction and indexing on the __manifest table
312    /// after every write operation to maintain optimal performance.
313    /// When disabled, manual optimization must be performed separately.
314    pub fn inline_optimization_enabled(mut self, enabled: bool) -> Self {
315        self.inline_optimization_enabled = enabled;
316        self
317    }
318
319    /// Enable or disable table version tracking through the namespace.
320    ///
321    /// When enabled, `describe_table` returns `managed_versioning: true` to indicate
322    /// that commits should go through the namespace's table version APIs rather than
323    /// direct object store operations.
324    ///
325    /// When disabled (default), `managed_versioning` is not set.
326    pub fn table_version_tracking_enabled(mut self, enabled: bool) -> Self {
327        self.table_version_tracking_enabled = enabled;
328        self
329    }
330
331    /// Enable or disable table version management through the `__manifest` table.
332    ///
333    /// When enabled, table versions are tracked as `table_version` entries in the
334    /// `__manifest` Lance table. This enables:
335    /// - Centralized version tracking instead of per-table `_versions/` directories
336    ///
337    /// Requires `manifest_enabled` to be true.
338    /// When disabled (default), version storage uses per-table storage operations.
339    pub fn table_version_storage_enabled(mut self, enabled: bool) -> Self {
340        self.table_version_storage_enabled = enabled;
341        self
342    }
343
344    /// Create a DirectoryNamespaceBuilder from properties HashMap.
345    ///
346    /// This method parses a properties map into builder configuration.
347    /// It expects:
348    /// - `root`: The root directory path (required)
349    /// - `manifest_enabled`: Enable manifest-based table tracking (optional, default: true)
350    /// - `dir_listing_enabled`: Enable directory listing for table discovery (optional, default: true)
351    /// - `inline_optimization_enabled`: Enable inline optimization of __manifest table (optional, default: true)
352    /// - `storage.*`: Storage options (optional, prefix will be stripped)
353    ///
354    /// Credential vendor properties (prefixed with `credential_vendor.`, prefix is stripped):
355    /// - `credential_vendor.enabled`: Set to "true" to enable credential vending (required)
356    /// - `credential_vendor.permission`: Permission level: read, write, or admin (default: read)
357    ///
358    /// AWS-specific properties (for s3:// locations):
359    /// - `credential_vendor.aws_role_arn`: AWS IAM role ARN (required for AWS)
360    /// - `credential_vendor.aws_external_id`: AWS external ID (optional)
361    /// - `credential_vendor.aws_region`: AWS region (optional)
362    /// - `credential_vendor.aws_role_session_name`: AWS role session name (optional)
363    /// - `credential_vendor.aws_duration_millis`: Credential duration in ms (default: 3600000, range: 15min-12hrs)
364    ///
365    /// GCP-specific properties (for gs:// locations):
366    /// - `credential_vendor.gcp_service_account`: Service account to impersonate (optional)
367    /// - `credential_vendor.gcp_workload_identity_provider`: Workload Identity Provider for OIDC token exchange (optional)
368    /// - `credential_vendor.gcp_impersonation_service_account`: Service account to impersonate after workload identity exchange (optional)
369    ///
370    /// Note: GCP uses Application Default Credentials (ADC). To use a service account key file,
371    /// set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable before starting.
372    /// GCP token duration cannot be configured; it's determined by the STS endpoint (typically 1 hour).
373    ///
374    /// Azure-specific properties (for az:// locations):
375    /// - `credential_vendor.azure_account_name`: Azure storage account name (required for Azure)
376    /// - `credential_vendor.azure_tenant_id`: Azure tenant ID (optional)
377    /// - `credential_vendor.azure_federated_client_id`: Client ID used for workload identity federation (optional)
378    /// - `credential_vendor.azure_duration_millis`: Credential duration in ms (default: 3600000, up to 7 days)
379    ///
380    /// # Arguments
381    ///
382    /// * `properties` - Configuration properties
383    /// * `session` - Optional Lance session to reuse object store registry
384    ///
385    /// # Returns
386    ///
387    /// Returns a `DirectoryNamespaceBuilder` instance.
388    ///
389    /// # Errors
390    ///
391    /// Returns an error if the `root` property is missing.
392    ///
393    /// # Examples
394    ///
395    /// ```no_run
396    /// # use lance_namespace_impls::DirectoryNamespaceBuilder;
397    /// # use std::collections::HashMap;
398    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
399    /// let mut properties = HashMap::new();
400    /// properties.insert("root".to_string(), "/path/to/data".to_string());
401    /// properties.insert("manifest_enabled".to_string(), "true".to_string());
402    /// properties.insert("dir_listing_enabled".to_string(), "false".to_string());
403    /// properties.insert("storage.region".to_string(), "us-west-2".to_string());
404    ///
405    /// let namespace = DirectoryNamespaceBuilder::from_properties(properties, None)?
406    ///     .build()
407    ///     .await?;
408    /// # Ok(())
409    /// # }
410    /// ```
411    pub fn from_properties(
412        properties: HashMap<String, String>,
413        session: Option<Arc<Session>>,
414    ) -> Result<Self> {
415        // Extract root from properties (required)
416        let root = properties.get("root").cloned().ok_or_else(|| {
417            lance_core::Error::from(NamespaceError::InvalidInput {
418                message: "Missing required property 'root' for directory namespace".to_string(),
419            })
420        })?;
421
422        // Extract storage options (properties prefixed with "storage.")
423        let storage_options: HashMap<String, String> = properties
424            .iter()
425            .filter_map(|(k, v)| {
426                k.strip_prefix("storage.")
427                    .map(|key| (key.to_string(), v.clone()))
428            })
429            .collect();
430
431        let storage_options = if storage_options.is_empty() {
432            None
433        } else {
434            Some(storage_options)
435        };
436
437        // Extract manifest_enabled (default: true)
438        let manifest_enabled = properties
439            .get("manifest_enabled")
440            .and_then(|v| v.parse::<bool>().ok())
441            .unwrap_or(true);
442
443        // Extract dir_listing_enabled (default: true)
444        let dir_listing_enabled = properties
445            .get("dir_listing_enabled")
446            .and_then(|v| v.parse::<bool>().ok())
447            .unwrap_or(true);
448
449        // Extract inline_optimization_enabled (default: true)
450        let inline_optimization_enabled = properties
451            .get("inline_optimization_enabled")
452            .and_then(|v| v.parse::<bool>().ok())
453            .unwrap_or(true);
454
455        // Extract table_version_tracking_enabled (default: false)
456        let table_version_tracking_enabled = properties
457            .get("table_version_tracking_enabled")
458            .and_then(|v| v.parse::<bool>().ok())
459            .unwrap_or(false);
460
461        // Extract table_version_storage_enabled (default: false)
462        let table_version_storage_enabled = properties
463            .get("table_version_storage_enabled")
464            .and_then(|v| v.parse::<bool>().ok())
465            .unwrap_or(false);
466
467        // Extract dir_listing_to_manifest_migration_enabled (default: false)
468        let dir_listing_to_manifest_migration_enabled = properties
469            .get("dir_listing_to_manifest_migration_enabled")
470            .and_then(|v| v.parse::<bool>().ok())
471            .unwrap_or(false);
472
473        // Extract credential vendor properties (properties prefixed with "credential_vendor.")
474        // The prefix is stripped to get short property names
475        // The build() method will check if enabled=true before creating the vendor
476        let credential_vendor_properties: HashMap<String, String> = properties
477            .iter()
478            .filter_map(|(k, v)| {
479                k.strip_prefix("credential_vendor.")
480                    .map(|key| (key.to_string(), v.clone()))
481            })
482            .collect();
483
484        let commit_retries = properties
485            .get("commit_retries")
486            .and_then(|v| v.parse::<u32>().ok());
487
488        // Extract vend_input_storage_options (default: false)
489        let vend_input_storage_options = properties
490            .get("vend_input_storage_options")
491            .and_then(|v| v.parse::<bool>().ok())
492            .unwrap_or(false);
493
494        // Extract vend_input_storage_options_refresh_interval_millis (optional)
495        let vend_input_storage_options_refresh_interval_millis = properties
496            .get("vend_input_storage_options_refresh_interval_millis")
497            .and_then(|v| v.parse::<u64>().ok());
498
499        // Extract ops_metrics_enabled (default: false)
500        let ops_metrics_enabled = properties
501            .get("ops_metrics_enabled")
502            .and_then(|v| v.parse::<bool>().ok())
503            .unwrap_or(false);
504
505        Ok(Self {
506            root: root.trim_end_matches('/').to_string(),
507            storage_options,
508            session,
509            manifest_enabled,
510            dir_listing_enabled,
511            inline_optimization_enabled,
512            table_version_tracking_enabled,
513            table_version_storage_enabled,
514            dir_listing_to_manifest_migration_enabled,
515            credential_vendor_properties,
516            context_provider: None,
517            commit_retries,
518            vend_input_storage_options,
519            vend_input_storage_options_refresh_interval_millis,
520            ops_metrics_enabled,
521        })
522    }
523
524    /// Add a storage option.
525    ///
526    /// # Arguments
527    ///
528    /// * `key` - Storage option key (e.g., "region", "access_key_id")
529    /// * `value` - Storage option value
530    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
531        self.storage_options
532            .get_or_insert_with(HashMap::new)
533            .insert(key.into(), value.into());
534        self
535    }
536
537    /// Add multiple storage options.
538    ///
539    /// # Arguments
540    ///
541    /// * `options` - HashMap of storage options to add
542    pub fn storage_options(mut self, options: HashMap<String, String>) -> Self {
543        self.storage_options
544            .get_or_insert_with(HashMap::new)
545            .extend(options);
546        self
547    }
548
549    /// Set the Lance session to use for this namespace.
550    ///
551    /// When a session is provided, the namespace will reuse the session's
552    /// object store registry, allowing multiple namespaces and datasets
553    /// to share the same underlying storage connections.
554    ///
555    /// # Arguments
556    ///
557    /// * `session` - Arc-wrapped Lance session
558    pub fn session(mut self, session: Arc<Session>) -> Self {
559        self.session = Some(session);
560        self
561    }
562
563    /// Set the number of retries for commit operations on the manifest table.
564    /// If not set, defaults to [`lance_table::io::commit::CommitConfig`] default (20).
565    pub fn commit_retries(mut self, retries: u32) -> Self {
566        self.commit_retries = Some(retries);
567        self
568    }
569
570    /// Add a credential vendor property.
571    ///
572    /// Use short property names without the `credential_vendor.` prefix.
573    /// Common properties: `enabled`, `permission`.
574    /// AWS properties: `aws_role_arn`, `aws_external_id`, `aws_region`, `aws_role_session_name`, `aws_duration_millis`.
575    /// GCP properties: `gcp_service_account`, `gcp_workload_identity_provider`, `gcp_impersonation_service_account`.
576    /// Azure properties: `azure_account_name`, `azure_tenant_id`, `azure_federated_client_id`, `azure_duration_millis`.
577    ///
578    /// # Arguments
579    ///
580    /// * `key` - Property key (e.g., "enabled", "aws_role_arn")
581    /// * `value` - Property value
582    ///
583    /// # Example
584    ///
585    /// ```no_run
586    /// # use lance_namespace_impls::DirectoryNamespaceBuilder;
587    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
588    /// let namespace = DirectoryNamespaceBuilder::new("s3://my-bucket/data")
589    ///     .credential_vendor_property("enabled", "true")
590    ///     .credential_vendor_property("aws_role_arn", "arn:aws:iam::123456789012:role/MyRole")
591    ///     .credential_vendor_property("permission", "read")
592    ///     .build()
593    ///     .await?;
594    /// # Ok(())
595    /// # }
596    /// ```
597    pub fn credential_vendor_property(
598        mut self,
599        key: impl Into<String>,
600        value: impl Into<String>,
601    ) -> Self {
602        self.credential_vendor_properties
603            .insert(key.into(), value.into());
604        self
605    }
606
607    /// Add multiple credential vendor properties.
608    ///
609    /// Use short property names without the `credential_vendor.` prefix.
610    ///
611    /// # Arguments
612    ///
613    /// * `properties` - HashMap of credential vendor properties to add
614    pub fn credential_vendor_properties(mut self, properties: HashMap<String, String>) -> Self {
615        self.credential_vendor_properties.extend(properties);
616        self
617    }
618
619    /// Set a dynamic context provider for per-request context.
620    ///
621    /// The provider can be used to generate additional context for operations.
622    /// For DirectoryNamespace, the context is stored but not directly used
623    /// in operations (unlike RestNamespace where it's converted to HTTP headers).
624    ///
625    /// # Arguments
626    ///
627    /// * `provider` - The context provider implementation
628    pub fn context_provider(mut self, provider: Arc<dyn DynamicContextProvider>) -> Self {
629        self.context_provider = Some(provider);
630        self
631    }
632
633    /// Enable or disable returning input storage options in responses.
634    ///
635    /// When enabled, `describe_table` and `declare_table` will return the storage
636    /// options passed to the builder when no credential vendor is configured.
637    /// This is useful for testing scenarios where you want to pass storage options
638    /// through to clients.
639    ///
640    /// Default is false (storage options are not returned unless credential vending is configured).
641    pub fn vend_input_storage_options(mut self, enabled: bool) -> Self {
642        self.vend_input_storage_options = enabled;
643        self
644    }
645
646    /// Set the refresh interval for vended input storage options.
647    ///
648    /// When set, vended storage options will include an `expires_at_millis` field
649    /// calculated as `current_time_millis + interval_millis`. This allows clients
650    /// to know when to refresh credentials by calling `describe_table` again.
651    ///
652    /// This only has effect when `vend_input_storage_options` is enabled.
653    ///
654    /// # Arguments
655    ///
656    /// * `interval_millis` - The refresh interval in milliseconds
657    pub fn vend_input_storage_options_refresh_interval_millis(
658        mut self,
659        interval_millis: u64,
660    ) -> Self {
661        self.vend_input_storage_options_refresh_interval_millis = Some(interval_millis);
662        self
663    }
664
665    /// Enable or disable operation metrics tracking.
666    ///
667    /// When enabled, the namespace will track how many times each API operation
668    /// is called. Use `retrieve_ops_metrics()` on the built namespace to get
669    /// the current counts.
670    ///
671    /// Default is false.
672    pub fn ops_metrics_enabled(mut self, enabled: bool) -> Self {
673        self.ops_metrics_enabled = enabled;
674        self
675    }
676
677    /// Build the DirectoryNamespace.
678    ///
679    /// # Returns
680    ///
681    /// Returns a `DirectoryNamespace` instance.
682    ///
683    /// # Errors
684    ///
685    /// Returns an error if:
686    /// - The root path is invalid
687    /// - Connection to the storage backend fails
688    /// - Storage options are invalid
689    pub async fn build(self) -> Result<DirectoryNamespace> {
690        // Validate: table_version_storage_enabled requires manifest_enabled
691        if self.table_version_storage_enabled && !self.manifest_enabled {
692            return Err(NamespaceError::InvalidInput {
693                message: "table_version_storage_enabled requires manifest_enabled=true".to_string(),
694            }
695            .into());
696        }
697
698        let (object_store, base_path) =
699            Self::initialize_object_store(&self.root, &self.storage_options, &self.session).await?;
700
701        let manifest_ns = if self.manifest_enabled {
702            match manifest::ManifestNamespace::from_directory(
703                self.root.clone(),
704                self.storage_options.clone(),
705                self.session.clone(),
706                object_store.clone(),
707                base_path.clone(),
708                self.dir_listing_enabled,
709                self.inline_optimization_enabled,
710                self.commit_retries,
711                self.table_version_storage_enabled,
712            )
713            .await
714            {
715                Ok(ns) => Some(Arc::new(ns)),
716                Err(e) => {
717                    // Failed to initialize manifest namespace, fall back to directory listing only
718                    log::warn!(
719                        "Failed to initialize manifest namespace, falling back to directory listing only: {}",
720                        e
721                    );
722                    None
723                }
724            }
725        } else {
726            None
727        };
728
729        // Create credential vendor once during initialization if enabled
730        let credential_vendor = if has_credential_vendor_config(&self.credential_vendor_properties)
731        {
732            create_credential_vendor_for_location(&self.root, &self.credential_vendor_properties)
733                .await?
734                .map(Arc::from)
735        } else {
736            None
737        };
738
739        let ops_metrics = if self.ops_metrics_enabled {
740            Some(Arc::new(OpsMetrics::default()))
741        } else {
742            None
743        };
744
745        Ok(DirectoryNamespace {
746            root: self.root,
747            storage_options: self.storage_options,
748            session: self.session,
749            object_store,
750            base_path,
751            manifest_ns,
752            dir_listing_enabled: self.dir_listing_enabled,
753            dir_listing_to_manifest_migration_enabled: self
754                .dir_listing_to_manifest_migration_enabled,
755            table_version_tracking_enabled: self.table_version_tracking_enabled,
756            table_version_storage_enabled: self.table_version_storage_enabled,
757            credential_vendor,
758            context_provider: self.context_provider,
759            vend_input_storage_options: self.vend_input_storage_options,
760            vend_input_storage_options_refresh_interval_millis: self
761                .vend_input_storage_options_refresh_interval_millis,
762            ops_metrics,
763        })
764    }
765
766    /// Initialize the Lance ObjectStore based on the configuration
767    async fn initialize_object_store(
768        root: &str,
769        storage_options: &Option<HashMap<String, String>>,
770        session: &Option<Arc<Session>>,
771    ) -> Result<(Arc<ObjectStore>, Path)> {
772        // Build ObjectStoreParams from storage options
773        let accessor = storage_options.clone().map(|opts| {
774            Arc::new(lance_io::object_store::StorageOptionsAccessor::with_static_options(opts))
775        });
776        let params = ObjectStoreParams {
777            storage_options_accessor: accessor,
778            ..Default::default()
779        };
780
781        // Use object store registry from session if provided, otherwise create a new one
782        let registry = if let Some(session) = session {
783            session.store_registry()
784        } else {
785            Arc::new(ObjectStoreRegistry::default())
786        };
787
788        // Use Lance's object store factory to create from URI
789        let (object_store, base_path) = ObjectStore::from_uri_and_params(registry, root, &params)
790            .await
791            .map_err(|e| {
792                lance_core::Error::from(NamespaceError::Internal {
793                    message: format!("Failed to create object store: {:?}", e),
794                })
795            })?;
796
797        Ok((object_store, base_path))
798    }
799}
800
801/// Directory-based implementation of Lance Namespace.
802///
803/// This implementation stores tables as Lance datasets in a directory structure.
804/// It supports local filesystems and cloud storage backends through Lance's object store.
805///
806/// ## Manifest-based Listing
807///
808/// When `manifest_enabled=true`, the namespace uses a special `__manifest` Lance table to track tables
809/// instead of scanning the filesystem. This provides:
810/// - Better performance for listing operations
811/// - Ability to track table metadata
812/// - Foundation for future features like namespaces and table renaming
813///
814/// When `dir_listing_enabled=true`, the namespace falls back to directory scanning for tables not
815/// found in the manifest, enabling gradual migration.
816///
817/// ## Credential Vending
818///
819/// When credential vendor properties are configured, `describe_table` will vend temporary
820/// credentials based on the table location URI. The vendor type is auto-selected:
821/// - `s3://` locations use AWS STS AssumeRole
822/// - `gs://` locations use GCP OAuth2 tokens
823/// - `az://` locations use Azure SAS tokens
824pub struct DirectoryNamespace {
825    root: String,
826    storage_options: Option<HashMap<String, String>>,
827    session: Option<Arc<Session>>,
828    object_store: Arc<ObjectStore>,
829    base_path: Path,
830    manifest_ns: Option<Arc<manifest::ManifestNamespace>>,
831    dir_listing_enabled: bool,
832    /// When true, root-level table operations check the manifest first before
833    /// falling back to directory listing. When false, root-level tables skip
834    /// the manifest check and use directory listing directly.
835    dir_listing_to_manifest_migration_enabled: bool,
836    /// When true, `describe_table` returns `managed_versioning: true` to indicate
837    /// commits should go through namespace table version APIs.
838    table_version_tracking_enabled: bool,
839    /// When true, table versions are stored in the `__manifest` table.
840    table_version_storage_enabled: bool,
841    /// Credential vendor created once during initialization.
842    /// Used to vend temporary credentials for table access.
843    credential_vendor: Option<Arc<dyn CredentialVendor>>,
844    /// Dynamic context provider for per-request context.
845    /// Stored but not directly used in operations (available for future extensions).
846    #[allow(dead_code)]
847    context_provider: Option<Arc<dyn DynamicContextProvider>>,
848    /// When true, returns input storage options in responses when no credential vendor is configured.
849    vend_input_storage_options: bool,
850    /// Refresh interval in milliseconds for vended input storage options.
851    /// When set, expires_at_millis is added to storage options.
852    vend_input_storage_options_refresh_interval_millis: Option<u64>,
853    /// Operation metrics tracker, created when ops_metrics_enabled is true.
854    ops_metrics: Option<Arc<OpsMetrics>>,
855}
856
857impl std::fmt::Debug for DirectoryNamespace {
858    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
859        write!(f, "{}", self.namespace_id())
860    }
861}
862
863impl std::fmt::Display for DirectoryNamespace {
864    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
865        write!(f, "{}", self.namespace_id())
866    }
867}
868
869/// Describes the version ranges to delete for a single table.
870/// Used by `batch_delete_table_versions` and `delete_physical_version_files`.
871struct TableDeleteEntry {
872    table_id: Option<Vec<String>>,
873    ranges: Vec<(i64, i64)>,
874}
875
876impl DirectoryNamespace {
877    /// Apply pagination to a list of table names
878    ///
879    /// Sorts the list alphabetically and applies pagination using page_token (start_after) and limit.
880    ///
881    /// # Arguments
882    /// * `names` - The vector of table names to paginate
883    /// * `page_token` - Skip items until finding one greater than this value (start_after semantics)
884    /// * `limit` - Maximum number of items to keep
885    ///
886    /// # Returns
887    /// The next page token (last item in this page) if more results exist beyond the limit,
888    /// or `None` if this is the last page.
889    fn apply_pagination(
890        names: &mut Vec<String>,
891        page_token: Option<String>,
892        limit: Option<i32>,
893    ) -> Option<String> {
894        // Sort alphabetically for consistent ordering
895        names.sort();
896
897        // Apply page_token filtering (start_after semantics)
898        if let Some(start_after) = page_token {
899            if let Some(index) = names
900                .iter()
901                .position(|name| name.as_str() > start_after.as_str())
902            {
903                names.drain(0..index);
904            } else {
905                names.clear();
906            }
907        }
908
909        // Apply limit and compute next page token
910        if let Some(limit) = limit
911            && limit >= 0
912        {
913            let limit = limit as usize;
914            if names.len() > limit {
915                let next_page_token = if limit > 0 {
916                    Some(names[limit - 1].clone())
917                } else {
918                    None
919                };
920                names.truncate(limit);
921                return next_page_token;
922            }
923        }
924
925        None
926    }
927
928    /// List tables using directory scanning (fallback method)
929    async fn list_directory_tables(&self) -> Result<Vec<String>> {
930        let mut tables = Vec::new();
931        let entries = self
932            .object_store
933            .read_dir(self.base_path.clone())
934            .await
935            .map_err(|e| {
936                lance_core::Error::from(NamespaceError::Internal {
937                    message: format!("Failed to list directory: {:?}", e),
938                })
939            })?;
940
941        for entry in entries {
942            let path = entry.trim_end_matches('/');
943            if !path.ends_with(".lance") {
944                continue;
945            }
946
947            let table_name = &path[..path.len() - 6];
948
949            // Use atomic check to skip deregistered tables.
950            let status = self.check_table_status(table_name).await;
951            if status.is_deregistered {
952                continue;
953            }
954
955            tables.push(table_name.to_string());
956        }
957
958        Ok(tables)
959    }
960
961    /// Validate that the namespace ID represents the root namespace
962    fn validate_root_namespace_id(id: &Option<Vec<String>>) -> Result<()> {
963        if let Some(id) = id
964            && !id.is_empty()
965        {
966            return Err(NamespaceError::Unsupported {
967                message: format!(
968                    "Directory namespace only supports root namespace operations, but got namespace ID: {:?}. Expected empty ID.",
969                    id
970                ),
971            }
972            .into());
973        }
974        Ok(())
975    }
976
977    /// Extract table name from table ID
978    fn table_name_from_id(id: &Option<Vec<String>>) -> Result<String> {
979        let id = id.as_ref().ok_or_else(|| {
980            lance_core::Error::from(NamespaceError::InvalidInput {
981                message: "Directory namespace table ID cannot be empty".to_string(),
982            })
983        })?;
984
985        if id.len() != 1 {
986            return Err(NamespaceError::Unsupported {
987                message: format!(
988                    "Multi-level table IDs are only supported when manifest mode is enabled, but got: {:?}",
989                    id
990                ),
991            }
992            .into());
993        }
994
995        Ok(id[0].clone())
996    }
997
998    fn format_table_id(table_id: &[String]) -> String {
999        format!(
1000            "table id '{}'",
1001            manifest::ManifestNamespace::str_object_id(table_id)
1002        )
1003    }
1004
1005    fn format_table_id_from_request(id: &Option<Vec<String>>) -> String {
1006        id.as_ref()
1007            .map(|table_id| Self::format_table_id(table_id))
1008            .unwrap_or_else(|| "table id '<unknown>'".to_string())
1009    }
1010
1011    async fn resolve_table_location(&self, id: &Option<Vec<String>>) -> Result<String> {
1012        let mut describe_req = DescribeTableRequest::new();
1013        describe_req.id = id.clone();
1014        describe_req.load_detailed_metadata = Some(false);
1015
1016        // Use internal impl to avoid counting this as an external API call
1017        let describe_resp = self.describe_table_impl(describe_req).await?;
1018
1019        describe_resp.location.ok_or_else(|| {
1020            lance_core::Error::from(NamespaceError::TableNotFound {
1021                message: format!("Table location not found for: {:?}", id),
1022            })
1023        })
1024    }
1025
1026    async fn table_has_actual_manifests(&self, table_name: &str) -> Result<bool> {
1027        manifest::ManifestNamespace::path_has_actual_manifests(
1028            &self.object_store,
1029            &self.table_path(table_name),
1030        )
1031        .await
1032    }
1033
1034    async fn filter_declared_tables(
1035        &self,
1036        tables: Vec<String>,
1037        include_declared: bool,
1038    ) -> Result<Vec<String>> {
1039        if include_declared {
1040            return Ok(tables);
1041        }
1042
1043        let mut stream = futures::stream::iter(tables.into_iter().map(|table_name| async move {
1044            // `include_declared=false` is an explicit opt-in. We still pay one `_versions/` probe
1045            // per table here so declared-state is derived from actual manifests. This is linear in
1046            // the total number of listed tables, but we probe a bounded number concurrently.
1047            if self.table_has_actual_manifests(&table_name).await? {
1048                Ok::<Option<String>, Error>(Some(table_name))
1049            } else {
1050                Ok::<Option<String>, Error>(None)
1051            }
1052        }))
1053        .buffered(manifest::DECLARED_FILTER_CONCURRENCY);
1054
1055        let mut filtered = Vec::new();
1056        while let Some(result) = stream.next().await {
1057            if let Some(table_name) = result? {
1058                filtered.push(table_name);
1059            }
1060        }
1061        Ok(filtered)
1062    }
1063
1064    fn ipc_reader_from_request_data(
1065        request_data: &Bytes,
1066        operation: &str,
1067    ) -> Result<(
1068        Box<dyn arrow::record_batch::RecordBatchReader + Send>,
1069        usize,
1070    )> {
1071        if request_data.is_empty() {
1072            return Err(NamespaceError::InvalidInput {
1073                message: format!(
1074                    "Request data (Arrow IPC stream) is required for {}",
1075                    operation
1076                ),
1077            }
1078            .into());
1079        }
1080
1081        let cursor = Cursor::new(request_data.as_ref());
1082        let stream_reader =
1083            StreamReader::try_new(cursor, None).map_err(|e| NamespaceError::InvalidInput {
1084                message: format!("Invalid Arrow IPC stream: {}", e),
1085            })?;
1086        let arrow_schema = stream_reader.schema();
1087
1088        let mut num_rows = 0usize;
1089        let mut batches = Vec::new();
1090        for batch_result in stream_reader {
1091            let batch = batch_result.map_err(|e| NamespaceError::Internal {
1092                message: format!("Failed to read batch from IPC stream: {}", e),
1093            })?;
1094            num_rows += batch.num_rows();
1095            batches.push(batch);
1096        }
1097
1098        let reader: Box<dyn arrow::record_batch::RecordBatchReader + Send> = if batches.is_empty() {
1099            let batch = arrow::record_batch::RecordBatch::new_empty(arrow_schema.clone());
1100            Box::new(RecordBatchIterator::new(vec![Ok(batch)], arrow_schema))
1101        } else {
1102            let batch_results: Vec<_> = batches.into_iter().map(Ok).collect();
1103            Box::new(RecordBatchIterator::new(batch_results, arrow_schema))
1104        };
1105
1106        Ok((reader, num_rows))
1107    }
1108
1109    async fn table_uri_has_actual_manifests(&self, table_uri: &str) -> Result<bool> {
1110        let table_path = self.object_store_path_from_uri(table_uri)?;
1111        manifest::ManifestNamespace::path_has_actual_manifests(&self.object_store, &table_path)
1112            .await
1113    }
1114
1115    fn object_store_path_from_uri(&self, uri: &str) -> Result<Path> {
1116        let registry = self
1117            .session
1118            .as_ref()
1119            .map(|session| session.store_registry())
1120            .unwrap_or_else(|| Arc::new(ObjectStoreRegistry::default()));
1121        ObjectStore::extract_path_from_uri(registry, uri)
1122    }
1123
1124    fn validate_dir_only_properties(
1125        properties: Option<&HashMap<String, String>>,
1126        operation: &str,
1127    ) -> Result<()> {
1128        // Dir-only mode has no metadata catalog, so non-empty table properties would be accepted
1129        // and then lost. Reject them instead. Request-level storage options are different: they
1130        // directly affect the current write and remain supported in dir-only mode.
1131        if properties.is_some_and(|properties| !properties.is_empty()) {
1132            return Err(NamespaceError::Unsupported {
1133                message: format!(
1134                    "{} with non-empty table properties requires manifest_enabled=true",
1135                    operation
1136                ),
1137            }
1138            .into());
1139        }
1140        Ok(())
1141    }
1142
1143    async fn write_reader_to_table(
1144        &self,
1145        table_uri: &str,
1146        reader: Box<dyn arrow::record_batch::RecordBatchReader + Send>,
1147        mode: WriteMode,
1148        extra_storage_options: Option<HashMap<String, String>>,
1149    ) -> Result<Dataset> {
1150        // Insert and merge-insert request models do not carry request-level storage options,
1151        // so these writes intentionally use the namespace-level storage options only.
1152        let mut merged_storage_options = self.storage_options.clone().unwrap_or_default();
1153        if let Some(extra_storage_options) = extra_storage_options {
1154            merged_storage_options.extend(extra_storage_options);
1155        }
1156        let store_params = (!merged_storage_options.is_empty()).then(|| ObjectStoreParams {
1157            storage_options_accessor: Some(Arc::new(
1158                lance_io::object_store::StorageOptionsAccessor::with_static_options(
1159                    merged_storage_options,
1160                ),
1161            )),
1162            ..Default::default()
1163        });
1164
1165        let write_params = WriteParams {
1166            mode,
1167            store_params,
1168            session: self.session.clone(),
1169            ..Default::default()
1170        };
1171
1172        let dataset = Dataset::write(reader, table_uri, Some(write_params))
1173            .await
1174            .map_err(|e| NamespaceError::Internal {
1175                message: format!("Failed to write table at '{}': {}", table_uri, e),
1176            })?;
1177
1178        Ok(dataset)
1179    }
1180
1181    async fn list_table_versions_from_storage(
1182        &self,
1183        table_uri: &str,
1184        descending: bool,
1185        limit: Option<i32>,
1186    ) -> Result<Vec<TableVersion>> {
1187        let table_path = self.object_store_path_from_uri(table_uri)?;
1188        let versions_dir = table_path.child(VERSIONS_DIR);
1189        let manifest_metas: Vec<_> = self
1190            .object_store
1191            .read_dir_all(&versions_dir, None)
1192            .try_collect()
1193            .await
1194            .map_err(|e| {
1195                lance_core::Error::from(NamespaceError::Internal {
1196                    message: format!(
1197                        "Failed to list manifest files for table at '{}': {}",
1198                        table_uri, e
1199                    ),
1200                })
1201            })?;
1202
1203        let is_v2_naming = manifest_metas
1204            .first()
1205            .is_some_and(|meta| meta.location.filename().is_some_and(|f| f.len() == 29));
1206
1207        let mut table_versions: Vec<TableVersion> = manifest_metas
1208            .into_iter()
1209            .filter_map(|meta| {
1210                let filename = meta.location.filename()?;
1211                let version_str = filename.strip_suffix(".manifest")?;
1212                if version_str.starts_with('d') {
1213                    return None;
1214                }
1215                let file_version: u64 = version_str.parse().ok()?;
1216
1217                let actual_version = if file_version > u64::MAX / 2 {
1218                    u64::MAX - file_version
1219                } else {
1220                    file_version
1221                };
1222
1223                Some(TableVersion {
1224                    version: actual_version as i64,
1225                    manifest_path: meta.location.to_string(),
1226                    manifest_size: Some(meta.size as i64),
1227                    e_tag: meta.e_tag,
1228                    timestamp_millis: Some(meta.last_modified.timestamp_millis()),
1229                    metadata: None,
1230                })
1231            })
1232            .collect();
1233
1234        let list_is_ordered = self.object_store.list_is_lexically_ordered;
1235
1236        let needs_sort = if list_is_ordered {
1237            if is_v2_naming {
1238                !descending
1239            } else {
1240                descending
1241            }
1242        } else {
1243            true
1244        };
1245
1246        if needs_sort {
1247            if descending {
1248                table_versions.sort_by(|a, b| b.version.cmp(&a.version));
1249            } else {
1250                table_versions.sort_by(|a, b| a.version.cmp(&b.version));
1251            }
1252        }
1253
1254        if let Some(limit) = limit {
1255            table_versions.truncate(limit as usize);
1256        }
1257
1258        Ok(table_versions)
1259    }
1260
1261    /// Internal describe_table implementation that doesn't record metrics.
1262    /// Used by both the public describe_table (which records metrics) and
1263    /// internal callers like resolve_table_location (which shouldn't).
1264    async fn describe_table_impl(
1265        &self,
1266        request: DescribeTableRequest,
1267    ) -> Result<DescribeTableResponse> {
1268        let is_root_level = request.id.as_ref().is_some_and(|id| id.len() == 1);
1269        let skip_manifest_for_root = self.dir_listing_enabled
1270            && is_root_level
1271            && !self.dir_listing_to_manifest_migration_enabled;
1272        if let Some(ref manifest_ns) = self.manifest_ns
1273            && !skip_manifest_for_root
1274        {
1275            match manifest_ns.describe_table(request.clone()).await {
1276                Ok(mut response) => {
1277                    if let Some(ref table_uri) = response.table_uri {
1278                        // For backwards compatibility, only skip vending credentials when explicitly set to false
1279                        let vend = request.vend_credentials.unwrap_or(true);
1280                        let identity = request.identity.as_deref();
1281                        response.storage_options = self
1282                            .get_storage_options_for_table(table_uri, vend, identity)
1283                            .await?;
1284                    }
1285                    // Set managed_versioning flag when table_version_tracking_enabled
1286                    if self.table_version_tracking_enabled {
1287                        response.managed_versioning = Some(true);
1288                    }
1289                    return Ok(response);
1290                }
1291                Err(_) if self.dir_listing_enabled && is_root_level => {
1292                    // Fall through to directory check only for single-level IDs
1293                }
1294                Err(e) => return Err(e),
1295            }
1296        }
1297
1298        let table_name = Self::table_name_from_id(&request.id)?;
1299        let table_id = Self::format_table_id_from_request(&request.id);
1300        let table_uri = self.table_full_uri(&table_name);
1301
1302        // Atomically check table existence and deregistration status
1303        let status = self.check_table_status(&table_name).await;
1304
1305        if !status.exists {
1306            return Err(NamespaceError::TableNotFound {
1307                message: table_id.clone(),
1308            }
1309            .into());
1310        }
1311
1312        if status.is_deregistered {
1313            return Err(NamespaceError::TableNotFound {
1314                message: format!("Table is deregistered: {}", table_id),
1315            }
1316            .into());
1317        }
1318
1319        let load_detailed_metadata = request.load_detailed_metadata.unwrap_or(false);
1320        let should_check_declared =
1321            load_detailed_metadata || request.check_declared.unwrap_or(false);
1322        // For backwards compatibility, only skip vending credentials when explicitly set to false
1323        let vend_credentials = request.vend_credentials.unwrap_or(true);
1324        let identity = request.identity.as_deref();
1325        let is_only_declared = if should_check_declared {
1326            if status.has_reserved_file {
1327                Some(!self.table_has_actual_manifests(&table_name).await?)
1328            } else {
1329                Some(false)
1330            }
1331        } else {
1332            None
1333        };
1334
1335        if !load_detailed_metadata {
1336            let storage_options = self
1337                .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1338                .await?;
1339            return Ok(DescribeTableResponse {
1340                table: Some(table_name),
1341                namespace: request.id.as_ref().map(|id| {
1342                    if id.len() > 1 {
1343                        id[..id.len() - 1].to_vec()
1344                    } else {
1345                        vec![]
1346                    }
1347                }),
1348                location: Some(table_uri.clone()),
1349                table_uri: Some(table_uri),
1350                storage_options,
1351                is_only_declared,
1352                managed_versioning: if self.table_version_tracking_enabled {
1353                    Some(true)
1354                } else {
1355                    None
1356                },
1357                ..Default::default()
1358            });
1359        }
1360
1361        if is_only_declared == Some(true) {
1362            let storage_options = self
1363                .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1364                .await?;
1365            return Ok(DescribeTableResponse {
1366                table: Some(table_name),
1367                namespace: request.id.as_ref().map(|id| {
1368                    if id.len() > 1 {
1369                        id[..id.len() - 1].to_vec()
1370                    } else {
1371                        vec![]
1372                    }
1373                }),
1374                location: Some(table_uri.clone()),
1375                table_uri: Some(table_uri),
1376                storage_options,
1377                is_only_declared,
1378                managed_versioning: if self.table_version_tracking_enabled {
1379                    Some(true)
1380                } else {
1381                    None
1382                },
1383                ..Default::default()
1384            });
1385        }
1386
1387        // Try to load the dataset to get real information
1388        // Use DatasetBuilder with storage options to support S3 with custom endpoints
1389        let mut builder = DatasetBuilder::from_uri(&table_uri);
1390        if let Some(opts) = &self.storage_options {
1391            builder = builder.with_storage_options(opts.clone());
1392        }
1393        if let Some(sess) = &self.session {
1394            builder = builder.with_session(sess.clone());
1395        }
1396        match builder.load().await {
1397            Ok(mut dataset) => {
1398                // If a specific version is requested, checkout that version
1399                if let Some(requested_version) = request.version {
1400                    dataset = dataset
1401                        .checkout_version(requested_version as u64)
1402                        .await
1403                        .map_err(|e| {
1404                            lance_core::Error::from(NamespaceError::TableVersionNotFound {
1405                                message: format!(
1406                                    "Version {} not found for table '{}': {}",
1407                                    requested_version, table_name, e
1408                                ),
1409                            })
1410                        })?;
1411                }
1412
1413                let version_info = dataset.version();
1414                let lance_schema = dataset.schema();
1415                let arrow_schema: arrow_schema::Schema = lance_schema.into();
1416                let json_schema = arrow_schema_to_json(&arrow_schema)?;
1417                let storage_options = self
1418                    .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1419                    .await?;
1420
1421                // Convert BTreeMap to HashMap for the response
1422                let metadata: std::collections::HashMap<String, String> =
1423                    version_info.metadata.into_iter().collect();
1424
1425                Ok(DescribeTableResponse {
1426                    table: Some(table_name),
1427                    namespace: request.id.as_ref().map(|id| {
1428                        if id.len() > 1 {
1429                            id[..id.len() - 1].to_vec()
1430                        } else {
1431                            vec![]
1432                        }
1433                    }),
1434                    version: Some(version_info.version as i64),
1435                    location: Some(table_uri.clone()),
1436                    table_uri: Some(table_uri),
1437                    schema: Some(Box::new(json_schema)),
1438                    storage_options,
1439                    metadata: Some(metadata),
1440                    is_only_declared,
1441                    managed_versioning: if self.table_version_tracking_enabled {
1442                        Some(true)
1443                    } else {
1444                        None
1445                    },
1446                    ..Default::default()
1447                })
1448            }
1449            Err(err) => {
1450                if manifest::ManifestNamespace::is_not_found_load_error(&err)
1451                    && is_only_declared == Some(true)
1452                {
1453                    let storage_options = self
1454                        .get_storage_options_for_table(&table_uri, vend_credentials, identity)
1455                        .await?;
1456                    Ok(DescribeTableResponse {
1457                        table: Some(table_name),
1458                        namespace: request.id.as_ref().map(|id| {
1459                            if id.len() > 1 {
1460                                id[..id.len() - 1].to_vec()
1461                            } else {
1462                                vec![]
1463                            }
1464                        }),
1465                        location: Some(table_uri.clone()),
1466                        table_uri: Some(table_uri),
1467                        storage_options,
1468                        is_only_declared,
1469                        managed_versioning: if self.table_version_tracking_enabled {
1470                            Some(true)
1471                        } else {
1472                            None
1473                        },
1474                        ..Default::default()
1475                    })
1476                } else {
1477                    Err(NamespaceError::Internal {
1478                        message: format!(
1479                            "Table directory exists but cannot load dataset {}: {:?}",
1480                            table_name, err
1481                        ),
1482                    }
1483                    .into())
1484                }
1485            }
1486        }
1487    }
1488
1489    async fn load_dataset(
1490        &self,
1491        table_uri: &str,
1492        version: Option<i64>,
1493        operation: &str,
1494    ) -> Result<Dataset> {
1495        if let Some(version) = version
1496            && version < 0
1497        {
1498            return Err(NamespaceError::InvalidInput {
1499                message: format!(
1500                    "Table version for {} must be non-negative, got {}",
1501                    operation, version
1502                ),
1503            }
1504            .into());
1505        }
1506
1507        let mut builder = DatasetBuilder::from_uri(table_uri);
1508        if let Some(opts) = &self.storage_options {
1509            builder = builder.with_storage_options(opts.clone());
1510        }
1511        if let Some(sess) = &self.session {
1512            builder = builder.with_session(sess.clone());
1513        }
1514
1515        let dataset = builder.load().await.map_err(|e| {
1516            lance_core::Error::from(NamespaceError::TableNotFound {
1517                message: format!(
1518                    "Failed to open table at '{}' for {}: {}",
1519                    table_uri, operation, e
1520                ),
1521            })
1522        })?;
1523
1524        if let Some(version) = version {
1525            return dataset.checkout_version(version as u64).await.map_err(|e| {
1526                lance_core::Error::from(NamespaceError::TableVersionNotFound {
1527                    message: format!(
1528                        "Failed to checkout version {} for table at '{}' during {}: {}",
1529                        version, table_uri, operation, e
1530                    ),
1531                })
1532            });
1533        }
1534
1535        Ok(dataset)
1536    }
1537
1538    fn parse_index_type(index_type: &str) -> Result<IndexType> {
1539        match index_type.trim().to_ascii_uppercase().as_str() {
1540            "SCALAR" | "BTREE" => Ok(IndexType::BTree),
1541            "BITMAP" => Ok(IndexType::Bitmap),
1542            "LABEL_LIST" | "LABELLIST" => Ok(IndexType::LabelList),
1543            "INVERTED" | "FTS" => Ok(IndexType::Inverted),
1544            "NGRAM" => Ok(IndexType::NGram),
1545            "ZONEMAP" | "ZONE_MAP" => Ok(IndexType::ZoneMap),
1546            "BLOOMFILTER" | "BLOOM_FILTER" => Ok(IndexType::BloomFilter),
1547            "RTREE" | "R_TREE" => Ok(IndexType::RTree),
1548            "VECTOR" | "IVF_PQ" => Ok(IndexType::IvfPq),
1549            "IVF_FLAT" => Ok(IndexType::IvfFlat),
1550            "IVF_SQ" => Ok(IndexType::IvfSq),
1551            "IVF_RQ" => Ok(IndexType::IvfRq),
1552            "IVF_HNSW_FLAT" => Ok(IndexType::IvfHnswFlat),
1553            "IVF_HNSW_SQ" => Ok(IndexType::IvfHnswSq),
1554            "IVF_HNSW_PQ" => Ok(IndexType::IvfHnswPq),
1555            other => Err(NamespaceError::InvalidInput {
1556                message: format!("Unsupported index_type '{}'", other),
1557            }
1558            .into()),
1559        }
1560    }
1561
1562    fn parse_metric_type(distance_type: Option<&str>) -> Result<MetricType> {
1563        let distance_type = distance_type.unwrap_or("l2");
1564        MetricType::try_from(distance_type).map_err(|e| {
1565            lance_core::Error::from(NamespaceError::InvalidInput {
1566                message: format!(
1567                    "Unsupported distance_type '{}' for vector index: {}",
1568                    distance_type, e
1569                ),
1570            })
1571        })
1572    }
1573
1574    fn build_index_params(request: &CreateTableIndexRequest) -> Result<DirectoryIndexParams> {
1575        let index_type = Self::parse_index_type(&request.index_type)?;
1576        Ok(match index_type {
1577            IndexType::BTree => DirectoryIndexParams::Scalar {
1578                index_type,
1579                params: ScalarIndexParams::for_builtin(BuiltinIndexType::BTree),
1580            },
1581            IndexType::Bitmap => DirectoryIndexParams::Scalar {
1582                index_type,
1583                params: ScalarIndexParams::for_builtin(BuiltinIndexType::Bitmap),
1584            },
1585            IndexType::LabelList => DirectoryIndexParams::Scalar {
1586                index_type,
1587                params: ScalarIndexParams::for_builtin(BuiltinIndexType::LabelList),
1588            },
1589            IndexType::NGram => DirectoryIndexParams::Scalar {
1590                index_type,
1591                params: ScalarIndexParams::for_builtin(BuiltinIndexType::NGram),
1592            },
1593            IndexType::ZoneMap => DirectoryIndexParams::Scalar {
1594                index_type,
1595                params: ScalarIndexParams::for_builtin(BuiltinIndexType::ZoneMap),
1596            },
1597            IndexType::BloomFilter => DirectoryIndexParams::Scalar {
1598                index_type,
1599                params: ScalarIndexParams::for_builtin(BuiltinIndexType::BloomFilter),
1600            },
1601            IndexType::RTree => DirectoryIndexParams::Scalar {
1602                index_type,
1603                params: ScalarIndexParams::for_builtin(BuiltinIndexType::RTree),
1604            },
1605            IndexType::Inverted => {
1606                let mut params = InvertedIndexParams::default();
1607                if let Some(with_position) = request.with_position {
1608                    params = params.with_position(with_position);
1609                }
1610                if let Some(base_tokenizer) = &request.base_tokenizer {
1611                    params = params.base_tokenizer(base_tokenizer.clone());
1612                }
1613                if let Some(language) = &request.language {
1614                    params = params.language(language)?;
1615                }
1616                if let Some(max_token_length) = request.max_token_length {
1617                    if max_token_length < 0 {
1618                        return Err(NamespaceError::InvalidInput {
1619                            message: format!(
1620                                "FTS max_token_length must be non-negative, got {}",
1621                                max_token_length
1622                            ),
1623                        }
1624                        .into());
1625                    }
1626                    params = params.max_token_length(Some(max_token_length as usize));
1627                }
1628                if let Some(lower_case) = request.lower_case {
1629                    params = params.lower_case(lower_case);
1630                }
1631                if let Some(stem) = request.stem {
1632                    params = params.stem(stem);
1633                }
1634                if let Some(remove_stop_words) = request.remove_stop_words {
1635                    params = params.remove_stop_words(remove_stop_words);
1636                }
1637                if let Some(ascii_folding) = request.ascii_folding {
1638                    params = params.ascii_folding(ascii_folding);
1639                }
1640                DirectoryIndexParams::Inverted(params)
1641            }
1642            IndexType::IvfFlat => DirectoryIndexParams::Vector {
1643                index_type,
1644                params: VectorIndexParams::with_ivf_flat_params(
1645                    Self::parse_metric_type(request.distance_type.as_deref())?,
1646                    IvfBuildParams::default(),
1647                ),
1648            },
1649            IndexType::IvfPq => DirectoryIndexParams::Vector {
1650                index_type,
1651                params: VectorIndexParams::with_ivf_pq_params(
1652                    Self::parse_metric_type(request.distance_type.as_deref())?,
1653                    IvfBuildParams::default(),
1654                    PQBuildParams::default(),
1655                ),
1656            },
1657            IndexType::IvfSq => DirectoryIndexParams::Vector {
1658                index_type,
1659                params: VectorIndexParams::with_ivf_sq_params(
1660                    Self::parse_metric_type(request.distance_type.as_deref())?,
1661                    IvfBuildParams::default(),
1662                    SQBuildParams::default(),
1663                ),
1664            },
1665            IndexType::IvfRq => DirectoryIndexParams::Vector {
1666                index_type,
1667                params: VectorIndexParams::with_ivf_rq_params(
1668                    Self::parse_metric_type(request.distance_type.as_deref())?,
1669                    IvfBuildParams::default(),
1670                    RQBuildParams::default(),
1671                ),
1672            },
1673            IndexType::IvfHnswFlat => DirectoryIndexParams::Vector {
1674                index_type,
1675                params: VectorIndexParams::ivf_hnsw(
1676                    Self::parse_metric_type(request.distance_type.as_deref())?,
1677                    IvfBuildParams::default(),
1678                    HnswBuildParams::default(),
1679                ),
1680            },
1681            IndexType::IvfHnswSq => DirectoryIndexParams::Vector {
1682                index_type,
1683                params: VectorIndexParams::with_ivf_hnsw_sq_params(
1684                    Self::parse_metric_type(request.distance_type.as_deref())?,
1685                    IvfBuildParams::default(),
1686                    HnswBuildParams::default(),
1687                    SQBuildParams::default(),
1688                ),
1689            },
1690            IndexType::IvfHnswPq => DirectoryIndexParams::Vector {
1691                index_type,
1692                params: VectorIndexParams::with_ivf_hnsw_pq_params(
1693                    Self::parse_metric_type(request.distance_type.as_deref())?,
1694                    IvfBuildParams::default(),
1695                    HnswBuildParams::default(),
1696                    PQBuildParams::default(),
1697                ),
1698            },
1699            other => {
1700                return Err(NamespaceError::InvalidInput {
1701                    message: format!("Unsupported index type for namespace API: {}", other),
1702                }
1703                .into());
1704            }
1705        })
1706    }
1707
1708    fn paginate_indices(
1709        indices: &mut Vec<IndexContent>,
1710        page_token: Option<String>,
1711        limit: Option<i32>,
1712    ) -> Option<String> {
1713        indices.sort_by(|a, b| a.index_name.cmp(&b.index_name));
1714
1715        if let Some(start_after) = page_token {
1716            if let Some(index) = indices
1717                .iter()
1718                .position(|index| index.index_name.as_str() > start_after.as_str())
1719            {
1720                indices.drain(0..index);
1721            } else {
1722                indices.clear();
1723            }
1724        }
1725
1726        let mut next_page_token = None;
1727        if let Some(limit) = limit
1728            && limit >= 0
1729        {
1730            let limit = limit as usize;
1731            if limit > 0 && indices.len() > limit {
1732                next_page_token = Some(indices[limit - 1].index_name.clone());
1733            }
1734            indices.truncate(limit);
1735        }
1736        if indices.is_empty() {
1737            None
1738        } else {
1739            next_page_token
1740        }
1741    }
1742
1743    fn transaction_operation_name(transaction: &Transaction) -> String {
1744        match &transaction.operation {
1745            Operation::CreateIndex {
1746                new_indices,
1747                removed_indices,
1748            } if new_indices.is_empty() && !removed_indices.is_empty() => "DropIndex".to_string(),
1749            _ => transaction.operation.to_string(),
1750        }
1751    }
1752
1753    fn transaction_response(
1754        version: u64,
1755        transaction: &Transaction,
1756    ) -> DescribeTransactionResponse {
1757        let mut properties = transaction
1758            .transaction_properties
1759            .as_ref()
1760            .map(|properties| (**properties).clone())
1761            .unwrap_or_default();
1762        properties.insert("uuid".to_string(), transaction.uuid.clone());
1763        properties.insert("version".to_string(), version.to_string());
1764        properties.insert(
1765            "read_version".to_string(),
1766            transaction.read_version.to_string(),
1767        );
1768        properties.insert(
1769            "operation".to_string(),
1770            Self::transaction_operation_name(transaction),
1771        );
1772        if let Some(tag) = &transaction.tag {
1773            properties.insert("tag".to_string(), tag.clone());
1774        }
1775
1776        DescribeTransactionResponse {
1777            status: "SUCCEEDED".to_string(),
1778            properties: Some(properties),
1779        }
1780    }
1781
1782    fn describe_table_index_stats_response(
1783        stats: &serde_json::Value,
1784    ) -> DescribeTableIndexStatsResponse {
1785        let get_i64 = |key: &str| {
1786            stats.get(key).and_then(|value| {
1787                value
1788                    .as_i64()
1789                    .or_else(|| value.as_u64().and_then(|v| i64::try_from(v).ok()))
1790            })
1791        };
1792
1793        DescribeTableIndexStatsResponse {
1794            distance_type: stats
1795                .get("distance_type")
1796                .and_then(|value| value.as_str())
1797                .map(str::to_string),
1798            index_type: stats
1799                .get("index_type")
1800                .and_then(|value| value.as_str())
1801                .map(str::to_string),
1802            num_indexed_rows: get_i64("num_indexed_rows"),
1803            num_unindexed_rows: get_i64("num_unindexed_rows"),
1804            num_indices: get_i64("num_indices").and_then(|value| i32::try_from(value).ok()),
1805        }
1806    }
1807
1808    /// When transaction_id is not parseable as a version number (i.e. it's a UUID),
1809    /// find_transaction iterates through every version in reverse, reading each
1810    /// transaction file from storage. For tables with many versions this will
1811    /// be extremely slow — each iteration is a separate I/O call.
1812    async fn find_transaction(&self, dataset: &Dataset, id: &str) -> Result<(u64, Transaction)> {
1813        if let Ok(version) = id.parse::<u64>() {
1814            let transaction = dataset
1815                .read_transaction_by_version(version)
1816                .await
1817                .map_err(|e| {
1818                    lance_core::Error::from(NamespaceError::TransactionNotFound {
1819                        message: format!(
1820                            "Failed to read transaction for version {}: {}",
1821                            version, e
1822                        ),
1823                    })
1824                })?
1825                .ok_or_else(|| {
1826                    lance_core::Error::from(NamespaceError::TransactionNotFound {
1827                        message: format!("version {}", version),
1828                    })
1829                })?;
1830            return Ok((version, transaction));
1831        }
1832
1833        let versions = dataset.versions().await.map_err(|e| {
1834            lance_core::Error::from(NamespaceError::Internal {
1835                message: format!(
1836                    "Failed to list table versions while resolving transaction '{}': {}",
1837                    id, e
1838                ),
1839            })
1840        })?;
1841
1842        for version in versions.into_iter().rev() {
1843            if let Some(transaction) = dataset
1844                .read_transaction_by_version(version.version)
1845                .await
1846                .map_err(|e| {
1847                    lance_core::Error::from(NamespaceError::Internal {
1848                        message: format!(
1849                            "Failed to read transaction for version {} while resolving '{}': {}",
1850                            version.version, id, e
1851                        ),
1852                    })
1853                })?
1854                && transaction.uuid == id
1855            {
1856                return Ok((version.version, transaction));
1857            }
1858        }
1859
1860        Err(NamespaceError::TransactionNotFound {
1861            message: id.to_string(),
1862        }
1863        .into())
1864    }
1865
1866    fn table_full_uri(&self, table_name: &str) -> String {
1867        format!("{}/{}.lance", &self.root, table_name)
1868    }
1869
1870    /// Get the object store path for a table (relative to base_path)
1871    fn table_path(&self, table_name: &str) -> Path {
1872        self.base_path
1873            .child(format!("{}.lance", table_name).as_str())
1874    }
1875
1876    /// Get the reserved file path for a table
1877    fn table_reserved_file_path(&self, table_name: &str) -> Path {
1878        self.base_path
1879            .child(format!("{}.lance", table_name).as_str())
1880            .child(".lance-reserved")
1881    }
1882
1883    /// Get the deregistered marker file path for a table
1884    fn table_deregistered_file_path(&self, table_name: &str) -> Path {
1885        self.base_path
1886            .child(format!("{}.lance", table_name).as_str())
1887            .child(".lance-deregistered")
1888    }
1889
1890    /// Atomically check table existence and deregistration status.
1891    ///
1892    /// This performs a single directory listing to get a consistent snapshot of the
1893    /// table's state, avoiding race conditions between checking existence and
1894    /// checking deregistration status.
1895    pub(crate) async fn check_table_status(&self, table_name: &str) -> TableStatus {
1896        let table_path = self.table_path(table_name);
1897        match self.object_store.read_dir(table_path).await {
1898            Ok(entries) => {
1899                let exists = !entries.is_empty();
1900                let is_deregistered = entries.iter().any(|e| e.ends_with(".lance-deregistered"));
1901                let has_reserved_file = entries.iter().any(|e| e.ends_with(".lance-reserved"));
1902                TableStatus {
1903                    exists,
1904                    is_deregistered,
1905                    has_reserved_file,
1906                }
1907            }
1908            Err(_) => TableStatus {
1909                exists: false,
1910                is_deregistered: false,
1911                has_reserved_file: false,
1912            },
1913        }
1914    }
1915
1916    async fn put_marker_file_atomic(
1917        &self,
1918        path: &Path,
1919        file_description: &str,
1920    ) -> std::result::Result<(), String> {
1921        let put_opts = PutOptions {
1922            mode: PutMode::Create,
1923            ..Default::default()
1924        };
1925
1926        match self
1927            .object_store
1928            .inner
1929            .put_opts(path, bytes::Bytes::new().into(), put_opts)
1930            .await
1931        {
1932            Ok(_) => Ok(()),
1933            Err(ObjectStoreError::AlreadyExists { .. })
1934            | Err(ObjectStoreError::Precondition { .. }) => {
1935                Err(format!("{} already exists", file_description))
1936            }
1937            Err(e) => Err(format!("Failed to create {}: {:?}", file_description, e)),
1938        }
1939    }
1940
1941    /// Get storage options for a table, using credential vending if configured.
1942    ///
1943    /// If credential vendor properties are configured and the table location matches
1944    /// a supported cloud provider, this will create an appropriate vendor and vend
1945    /// temporary credentials scoped to the table location. Otherwise, returns the
1946    /// static storage options.
1947    ///
1948    /// The vendor type is auto-selected based on the table URI:
1949    /// - `s3://` locations use AWS STS AssumeRole
1950    /// - `gs://` locations use GCP OAuth2 tokens
1951    /// - `az://` locations use Azure SAS tokens
1952    ///
1953    /// The permission level (Read, Write, Admin) is configured at namespace
1954    /// initialization time via the `credential_vendor_permission` property.
1955    ///
1956    /// # Arguments
1957    ///
1958    /// * `table_uri` - The full URI of the table
1959    /// * `identity` - Optional identity from the request for identity-based credential vending
1960    async fn get_storage_options_for_table(
1961        &self,
1962        table_uri: &str,
1963        vend_credentials: bool,
1964        identity: Option<&Identity>,
1965    ) -> Result<Option<HashMap<String, String>>> {
1966        if vend_credentials && let Some(ref vendor) = self.credential_vendor {
1967            let vended = vendor.vend_credentials(table_uri, identity).await?;
1968            return Ok(Some(vended.storage_options));
1969        }
1970        // When vend_input_storage_options is enabled and no credential vendor is configured,
1971        // return the input storage options. This is useful for testing.
1972        if self.vend_input_storage_options {
1973            let mut options = self.storage_options.clone().unwrap_or_default();
1974            // Add expires_at_millis if refresh interval is configured
1975            if let Some(refresh_interval_millis) =
1976                self.vend_input_storage_options_refresh_interval_millis
1977            {
1978                let now_millis = std::time::SystemTime::now()
1979                    .duration_since(std::time::UNIX_EPOCH)
1980                    .unwrap()
1981                    .as_millis() as u64;
1982                let expires_at_millis = now_millis + refresh_interval_millis;
1983                options.insert(
1984                    "expires_at_millis".to_string(),
1985                    expires_at_millis.to_string(),
1986                );
1987            }
1988            return Ok(Some(options));
1989        }
1990        // When no credential vendor is configured, return None to avoid
1991        // leaking the namespace's own static credentials to clients.
1992        Ok(None)
1993    }
1994
1995    /// Migrate directory-based tables to the manifest.
1996    ///
1997    /// This is a one-time migration operation that:
1998    /// 1. Scans the directory for existing `.lance` tables
1999    /// 2. Registers any unmigrated tables in the manifest
2000    /// 3. Returns the count of tables that were migrated
2001    ///
2002    /// This method is safe to run multiple times - it will skip tables that are already
2003    /// registered in the manifest.
2004    ///
2005    /// # Usage
2006    ///
2007    /// After creating tables in directory-only mode or dual mode, you can migrate them
2008    /// to the manifest to enable manifest-only mode:
2009    ///
2010    /// ```no_run
2011    /// # use lance_namespace_impls::DirectoryNamespaceBuilder;
2012    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
2013    /// // Create namespace with dual mode (manifest + directory listing)
2014    /// let namespace = DirectoryNamespaceBuilder::new("/path/to/data")
2015    ///     .manifest_enabled(true)
2016    ///     .dir_listing_enabled(true)
2017    ///     .build()
2018    ///     .await?;
2019    ///
2020    /// // ... tables are created and used ...
2021    ///
2022    /// // Migrate existing directory tables to manifest
2023    /// let migrated_count = namespace.migrate().await?;
2024    /// println!("Migrated {} tables", migrated_count);
2025    ///
2026    /// // Now you can disable directory listing for better performance:
2027    /// // (requires rebuilding the namespace)
2028    /// let namespace = DirectoryNamespaceBuilder::new("/path/to/data")
2029    ///     .manifest_enabled(true)
2030    ///     .dir_listing_enabled(false)  // All tables now in manifest
2031    ///     .build()
2032    ///     .await?;
2033    /// # Ok(())
2034    /// # }
2035    /// ```
2036    ///
2037    /// # Returns
2038    ///
2039    /// Returns the number of tables that were migrated to the manifest.
2040    ///
2041    /// # Errors
2042    ///
2043    /// Returns an error if:
2044    /// - Manifest is not enabled
2045    /// - Directory listing fails
2046    /// - Manifest registration fails
2047    pub async fn migrate(&self) -> Result<usize> {
2048        // We only care about tables in the root namespace
2049        let Some(ref manifest_ns) = self.manifest_ns else {
2050            return Ok(0); // No manifest, nothing to migrate
2051        };
2052
2053        // Get all table locations already in the manifest
2054        let manifest_locations = manifest_ns.list_manifest_table_locations().await?;
2055
2056        // Get all tables from directory and skip declared-only tables that have not
2057        // written any actual version manifests yet.
2058        let dir_tables = self
2059            .filter_declared_tables(self.list_directory_tables().await?, false)
2060            .await?;
2061
2062        // Register each directory table that doesn't have an overlapping location
2063        // If a directory name already exists in the manifest,
2064        // that means the table must have already been migrated or created
2065        // in the manifest, so we can skip it.
2066        let mut migrated_count = 0;
2067        for table_name in dir_tables {
2068            // For root namespace tables, the directory name is "table_name.lance"
2069            let dir_name = format!("{}.lance", table_name);
2070            if !manifest_locations.contains(&dir_name) {
2071                manifest_ns.register_table(&table_name, dir_name).await?;
2072                migrated_count += 1;
2073            }
2074        }
2075
2076        Ok(migrated_count)
2077    }
2078
2079    /// Delete physical manifest files for the given table version ranges (best-effort).
2080    ///
2081    /// This helper is used by `batch_delete_table_versions` in both the manifest-enabled
2082    /// and non-manifest paths. It resolves each table's storage location, computes the
2083    /// version file paths, and attempts to delete them. Errors are logged (best-effort)
2084    /// when `best_effort` is true, or returned immediately when false.
2085    ///
2086    /// Returns the number of files successfully deleted.
2087    async fn delete_physical_version_files(
2088        &self,
2089        table_entries: &[TableDeleteEntry],
2090        best_effort: bool,
2091    ) -> Result<i64> {
2092        let mut deleted_count = 0i64;
2093        for te in table_entries {
2094            let table_uri = self.resolve_table_location(&te.table_id).await?;
2095            let table_path = self.object_store_path_from_uri(&table_uri)?;
2096            let versions_dir_path = table_path.child(VERSIONS_DIR);
2097
2098            for (start, end) in &te.ranges {
2099                for version in *start..=*end {
2100                    let version_path =
2101                        versions_dir_path.child(format!("{}.manifest", version as u64));
2102                    match self.object_store.inner.delete(&version_path).await {
2103                        Ok(_) => {
2104                            deleted_count += 1;
2105                        }
2106                        Err(object_store::Error::NotFound { .. }) => {}
2107                        Err(e) => {
2108                            if best_effort {
2109                                log::warn!(
2110                                    "Failed to delete manifest file for version {} of table {:?}: {:?}",
2111                                    version,
2112                                    te.table_id,
2113                                    e
2114                                );
2115                            } else {
2116                                return Err(NamespaceError::Internal {
2117                                    message: format!(
2118                                        "Failed to delete version {} for table at '{}': {}",
2119                                        version, table_uri, e
2120                                    ),
2121                                }
2122                                .into());
2123                            }
2124                        }
2125                    }
2126                }
2127            }
2128        }
2129        Ok(deleted_count)
2130    }
2131
2132    /// Apply all query parameters from a `QueryTableRequest`-like source onto a `Scanner`.
2133    ///
2134    /// This covers vector search, filters, column projection, limits, and ANN tuning knobs so
2135    /// that `explain_table_query_plan` / `analyze_table_query_plan` produce an accurate plan.
2136    #[allow(clippy::too_many_arguments)]
2137    fn apply_query_params_to_scanner(
2138        scanner: &mut Scanner,
2139        filter: Option<&str>,
2140        columns: Option<&QueryTableRequestColumns>,
2141        vector_column: Option<&str>,
2142        vector: &QueryTableRequestVector,
2143        k: i32,
2144        offset: Option<i32>,
2145        prefilter: Option<bool>,
2146        bypass_vector_index: Option<bool>,
2147        nprobes: Option<i32>,
2148        ef: Option<i32>,
2149        refine_factor: Option<i32>,
2150        distance_type: Option<&str>,
2151        fast_search_flag: Option<bool>,
2152        with_row_id: Option<bool>,
2153        lower_bound: Option<f32>,
2154        upper_bound: Option<f32>,
2155        operation: &str,
2156    ) -> Result<()> {
2157        // prefilter must be set before nearest() so the fragment-scan guard sees it.
2158        if let Some(pf) = prefilter {
2159            scanner.prefilter(pf);
2160        }
2161
2162        if let Some(filter) = filter {
2163            scanner.filter(filter).map_err(|e| {
2164                Error::invalid_input_source(
2165                    format!("Invalid filter expression for {}: {}", operation, e).into(),
2166                )
2167            })?;
2168        }
2169
2170        if let Some(cols) = columns {
2171            if let Some(ref names) = cols.column_names {
2172                scanner.project(names.as_slice()).map_err(|e| {
2173                    Error::invalid_input_source(
2174                        format!("Invalid column projection for {}: {}", operation, e).into(),
2175                    )
2176                })?;
2177            } else if let Some(ref aliases) = cols.column_aliases {
2178                // aliases maps output_alias -> source_column
2179                let pairs: Vec<(&str, &str)> = aliases
2180                    .iter()
2181                    .map(|(alias, src)| (alias.as_str(), src.as_str()))
2182                    .collect();
2183                scanner.project_with_transform(&pairs).map_err(|e| {
2184                    Error::invalid_input_source(
2185                        format!("Invalid column aliases for {}: {}", operation, e).into(),
2186                    )
2187                })?;
2188            }
2189        }
2190
2191        // Resolve query vector: prefer single_vector, fall back to first row of multi_vector.
2192        let query_vec: Option<Vec<f32>> = vector
2193            .single_vector
2194            .as_ref()
2195            .filter(|v| !v.is_empty())
2196            .cloned()
2197            .or_else(|| {
2198                vector
2199                    .multi_vector
2200                    .as_ref()
2201                    .and_then(|mv| mv.first())
2202                    .filter(|v| !v.is_empty())
2203                    .cloned()
2204            });
2205
2206        if let Some(q_vec) = query_vec {
2207            let col = vector_column.unwrap_or("vector");
2208            let q = Arc::new(Float32Array::from(q_vec));
2209            scanner
2210                .nearest(col, q.as_ref(), k.max(1) as usize)
2211                .map_err(|e| {
2212                    Error::invalid_input_source(
2213                        format!("Invalid vector query for {}: {}", operation, e).into(),
2214                    )
2215                })?;
2216
2217            // ANN parameters — must be applied after nearest().
2218            if let Some(n) = nprobes {
2219                scanner.nprobes(n.max(1) as usize);
2220            }
2221            if let Some(e) = ef {
2222                scanner.ef(e.max(1) as usize);
2223            }
2224            if let Some(rf) = refine_factor {
2225                scanner.refine(rf.max(0) as u32);
2226            }
2227            // bypass_vector_index and fast_search are mutually exclusive; apply in order.
2228            if let Some(true) = bypass_vector_index {
2229                scanner.use_index(false);
2230            }
2231            if let Some(true) = fast_search_flag {
2232                scanner.fast_search();
2233            }
2234            if lower_bound.is_some() || upper_bound.is_some() {
2235                scanner.distance_range(lower_bound, upper_bound);
2236            }
2237            if let Some(dt) = distance_type {
2238                let metric = Self::parse_metric_type(Some(dt))?;
2239                scanner.distance_metric(metric);
2240            }
2241            // Apply offset on top of the k nearest results.
2242            if let Some(off) = offset.filter(|&o| o > 0) {
2243                scanner.limit(None, Some(off as i64)).map_err(|e| {
2244                    Error::invalid_input_source(
2245                        format!("Invalid offset for {}: {}", operation, e).into(),
2246                    )
2247                })?;
2248            }
2249        } else {
2250            // Scalar (non-vector) query: treat k as a row LIMIT.
2251            let limit = if k > 0 { Some(k as i64) } else { None };
2252            scanner
2253                .limit(limit, offset.map(|o| o as i64))
2254                .map_err(|e| {
2255                    Error::invalid_input_source(
2256                        format!("Invalid limit/offset for {}: {}", operation, e).into(),
2257                    )
2258                })?;
2259        }
2260
2261        if let Some(true) = with_row_id {
2262            scanner.with_row_id();
2263        }
2264
2265        Ok(())
2266    }
2267
2268    /// Retrieve a snapshot of operation metrics.
2269    ///
2270    /// Returns a HashMap where keys are operation names (e.g., "list_tables", "describe_table")
2271    /// and values are the number of times each operation was called.
2272    ///
2273    /// Returns an empty HashMap if `ops_metrics_enabled` was false when building the namespace.
2274    pub fn retrieve_ops_metrics(&self) -> HashMap<String, u64> {
2275        self.ops_metrics
2276            .as_ref()
2277            .map(|m| m.retrieve())
2278            .unwrap_or_default()
2279    }
2280
2281    /// Reset all operation metrics counters to zero.
2282    ///
2283    /// Does nothing if `ops_metrics_enabled` was false when building the namespace.
2284    pub fn reset_ops_metrics(&self) {
2285        if let Some(ref metrics) = self.ops_metrics {
2286            metrics.reset();
2287        }
2288    }
2289
2290    /// Increment the counter for an operation.
2291    fn record_op(&self, operation: &str) {
2292        if let Some(ref metrics) = self.ops_metrics {
2293            metrics.increment(operation);
2294        }
2295    }
2296}
2297
2298#[async_trait]
2299impl LanceNamespace for DirectoryNamespace {
2300    async fn list_namespaces(
2301        &self,
2302        request: ListNamespacesRequest,
2303    ) -> Result<ListNamespacesResponse> {
2304        self.record_op("list_namespaces");
2305        if let Some(ref manifest_ns) = self.manifest_ns {
2306            return manifest_ns.list_namespaces(request).await;
2307        }
2308
2309        Self::validate_root_namespace_id(&request.id)?;
2310        Ok(ListNamespacesResponse::new(vec![]))
2311    }
2312
2313    async fn describe_namespace(
2314        &self,
2315        request: DescribeNamespaceRequest,
2316    ) -> Result<DescribeNamespaceResponse> {
2317        self.record_op("describe_namespace");
2318        if let Some(ref manifest_ns) = self.manifest_ns {
2319            return manifest_ns.describe_namespace(request).await;
2320        }
2321
2322        Self::validate_root_namespace_id(&request.id)?;
2323        #[allow(clippy::needless_update)]
2324        Ok(DescribeNamespaceResponse {
2325            properties: Some(HashMap::new()),
2326            ..Default::default()
2327        })
2328    }
2329
2330    async fn create_namespace(
2331        &self,
2332        request: CreateNamespaceRequest,
2333    ) -> Result<CreateNamespaceResponse> {
2334        self.record_op("create_namespace");
2335        if let Some(ref manifest_ns) = self.manifest_ns {
2336            return manifest_ns.create_namespace(request).await;
2337        }
2338
2339        if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2340            return Err(NamespaceError::NamespaceAlreadyExists {
2341                message: "root namespace".to_string(),
2342            }
2343            .into());
2344        }
2345
2346        Err(NamespaceError::Unsupported {
2347            message: "Child namespaces are only supported when manifest mode is enabled"
2348                .to_string(),
2349        }
2350        .into())
2351    }
2352
2353    async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> {
2354        self.record_op("drop_namespace");
2355        if let Some(ref manifest_ns) = self.manifest_ns {
2356            return manifest_ns.drop_namespace(request).await;
2357        }
2358
2359        if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2360            return Err(NamespaceError::InvalidInput {
2361                message: "Root namespace cannot be dropped".to_string(),
2362            }
2363            .into());
2364        }
2365
2366        Err(NamespaceError::Unsupported {
2367            message: "Child namespaces are only supported when manifest mode is enabled"
2368                .to_string(),
2369        }
2370        .into())
2371    }
2372
2373    async fn namespace_exists(&self, request: NamespaceExistsRequest) -> Result<()> {
2374        self.record_op("namespace_exists");
2375        if let Some(ref manifest_ns) = self.manifest_ns {
2376            return manifest_ns.namespace_exists(request).await;
2377        }
2378
2379        if request.id.is_none() || request.id.as_ref().unwrap().is_empty() {
2380            return Ok(());
2381        }
2382
2383        Err(NamespaceError::NamespaceNotFound {
2384            message: "Child namespaces are only supported when manifest mode is enabled"
2385                .to_string(),
2386        }
2387        .into())
2388    }
2389
2390    async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
2391        self.record_op("list_tables");
2392        // Validate that namespace ID is provided
2393        let namespace_id = request.id.as_ref().ok_or_else(|| {
2394            lance_core::Error::from(NamespaceError::InvalidInput {
2395                message: "Namespace ID is required".to_string(),
2396            })
2397        })?;
2398
2399        // For child namespaces, always delegate to manifest (if enabled)
2400        if !namespace_id.is_empty() {
2401            if let Some(ref manifest_ns) = self.manifest_ns {
2402                return manifest_ns.list_tables(request).await;
2403            }
2404            return Err(NamespaceError::Unsupported {
2405                message: "Child namespaces are only supported when manifest mode is enabled"
2406                    .to_string(),
2407            }
2408            .into());
2409        }
2410
2411        // When only manifest is enabled (no directory listing), delegate directly to manifest
2412        if let Some(ref manifest_ns) = self.manifest_ns
2413            && !self.dir_listing_enabled
2414        {
2415            return manifest_ns.list_tables(request).await;
2416        }
2417
2418        // When both manifest and directory listing are enabled with migration mode,
2419        // we need to merge and deduplicate
2420        let mut tables = if self.manifest_ns.is_some()
2421            && self.dir_listing_enabled
2422            && self.dir_listing_to_manifest_migration_enabled
2423        {
2424            // Get all manifest table locations (for deduplication)
2425            let manifest_locations = if let Some(ref manifest_ns) = self.manifest_ns {
2426                manifest_ns.list_manifest_table_locations().await?
2427            } else {
2428                std::collections::HashSet::new()
2429            };
2430
2431            // Get all manifest tables (without pagination for merging)
2432            let mut manifest_request = request.clone();
2433            manifest_request.limit = None;
2434            manifest_request.page_token = None;
2435            let manifest_tables = if let Some(ref manifest_ns) = self.manifest_ns {
2436                let manifest_response = manifest_ns.list_tables(manifest_request).await?;
2437                manifest_response.tables
2438            } else {
2439                vec![]
2440            };
2441
2442            // Start with all manifest table names
2443            // Add directory tables that aren't already in the manifest (by location)
2444            let mut all_tables: Vec<String> = manifest_tables;
2445            let dir_tables = self.list_directory_tables().await?;
2446            for table_name in dir_tables {
2447                // Check if this table's location is already in the manifest
2448                // Manifest stores full URIs, so we need to check both formats
2449                let full_location = format!("{}/{}.lance", self.root, table_name);
2450                let relative_location = format!("{}.lance", table_name);
2451                if !manifest_locations.contains(&full_location)
2452                    && !manifest_locations.contains(&relative_location)
2453                {
2454                    all_tables.push(table_name);
2455                }
2456            }
2457
2458            all_tables
2459        } else {
2460            self.list_directory_tables().await?
2461        };
2462
2463        tables = self
2464            .filter_declared_tables(tables, request.include_declared.unwrap_or(true))
2465            .await?;
2466
2467        // Apply sorting and pagination
2468        let next_page_token =
2469            Self::apply_pagination(&mut tables, request.page_token, request.limit);
2470        let mut response = ListTablesResponse::new(tables);
2471        response.page_token = next_page_token;
2472        Ok(response)
2473    }
2474
2475    async fn describe_table(&self, request: DescribeTableRequest) -> Result<DescribeTableResponse> {
2476        self.record_op("describe_table");
2477        self.describe_table_impl(request).await
2478    }
2479
2480    async fn table_exists(&self, request: TableExistsRequest) -> Result<()> {
2481        self.record_op("table_exists");
2482        let is_root_level = request.id.as_ref().is_some_and(|id| id.len() == 1);
2483        let skip_manifest_for_root = self.dir_listing_enabled
2484            && is_root_level
2485            && !self.dir_listing_to_manifest_migration_enabled;
2486        if let Some(ref manifest_ns) = self.manifest_ns
2487            && !skip_manifest_for_root
2488        {
2489            match manifest_ns.table_exists(request.clone()).await {
2490                Ok(()) => return Ok(()),
2491                Err(_) if self.dir_listing_enabled && is_root_level => {
2492                    // Fall through to directory check only for single-level IDs
2493                }
2494                Err(e) => return Err(e),
2495            }
2496        }
2497
2498        let table_name = Self::table_name_from_id(&request.id)?;
2499        let table_id = Self::format_table_id_from_request(&request.id);
2500
2501        // Atomically check table existence and deregistration status
2502        let status = self.check_table_status(&table_name).await;
2503
2504        if !status.exists {
2505            return Err(NamespaceError::TableNotFound {
2506                message: table_id.clone(),
2507            }
2508            .into());
2509        }
2510
2511        if status.is_deregistered {
2512            return Err(NamespaceError::TableNotFound {
2513                message: format!("Table is deregistered: {}", table_id),
2514            }
2515            .into());
2516        }
2517
2518        Ok(())
2519    }
2520
2521    async fn drop_table(&self, request: DropTableRequest) -> Result<DropTableResponse> {
2522        self.record_op("drop_table");
2523        if let Some(ref manifest_ns) = self.manifest_ns {
2524            return manifest_ns.drop_table(request).await;
2525        }
2526
2527        let table_name = Self::table_name_from_id(&request.id)?;
2528        let table_uri = self.table_full_uri(&table_name);
2529        let table_path = self.table_path(&table_name);
2530
2531        self.object_store
2532            .remove_dir_all(table_path)
2533            .await
2534            .map_err(|e| {
2535                lance_core::Error::from(NamespaceError::Internal {
2536                    message: format!("Failed to drop table {}: {:?}", table_name, e),
2537                })
2538            })?;
2539
2540        Ok(DropTableResponse {
2541            id: request.id,
2542            location: Some(table_uri),
2543            ..Default::default()
2544        })
2545    }
2546
2547    async fn create_table(
2548        &self,
2549        request: CreateTableRequest,
2550        request_data: Bytes,
2551    ) -> Result<CreateTableResponse> {
2552        self.record_op("create_table");
2553        if let Some(ref manifest_ns) = self.manifest_ns {
2554            return manifest_ns.create_table(request, request_data).await;
2555        }
2556
2557        Self::validate_dir_only_properties(request.properties.as_ref(), "create_table")?;
2558
2559        let table_name = Self::table_name_from_id(&request.id)?;
2560        let table_uri = self.table_full_uri(&table_name);
2561        let status = self.check_table_status(&table_name).await;
2562        let (reader, _num_rows) =
2563            Self::ipc_reader_from_request_data(&request_data, "create_table")?;
2564
2565        if status.exists && self.table_has_actual_manifests(&table_name).await? {
2566            return Err(NamespaceError::TableAlreadyExists {
2567                message: table_name,
2568            }
2569            .into());
2570        }
2571
2572        let write_result = self
2573            .write_reader_to_table(
2574                &table_uri,
2575                reader,
2576                WriteMode::Create,
2577                request.storage_options.clone(),
2578            )
2579            .await;
2580        if let Err(err) = write_result {
2581            if self.table_uri_has_actual_manifests(&table_uri).await? {
2582                return Err(NamespaceError::TableAlreadyExists {
2583                    message: table_name,
2584                }
2585                .into());
2586            }
2587            return Err(err);
2588        }
2589        Ok(CreateTableResponse {
2590            version: Some(1),
2591            location: Some(table_uri),
2592            storage_options: self.storage_options.clone(),
2593            properties: request.properties,
2594            ..Default::default()
2595        })
2596    }
2597
2598    async fn declare_table(&self, request: DeclareTableRequest) -> Result<DeclareTableResponse> {
2599        self.record_op("declare_table");
2600        if let Some(ref manifest_ns) = self.manifest_ns {
2601            let mut response = manifest_ns.declare_table(request.clone()).await?;
2602            if let Some(ref location) = response.location {
2603                // For backwards compatibility, only skip vending credentials when explicitly set to false
2604                let vend = request.vend_credentials.unwrap_or(true);
2605                let identity = request.identity.as_deref();
2606                response.storage_options = self
2607                    .get_storage_options_for_table(location, vend, identity)
2608                    .await?;
2609            }
2610            // Set managed_versioning when table_version_tracking_enabled
2611            if self.table_version_tracking_enabled {
2612                response.managed_versioning = Some(true);
2613            }
2614            return Ok(response);
2615        }
2616
2617        Self::validate_dir_only_properties(request.properties.as_ref(), "declare_table")?;
2618
2619        let table_name = Self::table_name_from_id(&request.id)?;
2620        let table_uri = self.table_full_uri(&table_name);
2621
2622        // Validate location if provided
2623        if let Some(location) = &request.location {
2624            let location = location.trim_end_matches('/');
2625            if location != table_uri {
2626                return Err(NamespaceError::InvalidInput {
2627                    message: format!(
2628                        "Cannot declare table {} at location {}, must be at location {}",
2629                        table_name, location, table_uri
2630                    ),
2631                }
2632                .into());
2633            }
2634        }
2635
2636        // Check if table already has data (created via create_table).
2637        // The atomic put only prevents races between concurrent declare_table calls,
2638        // not between declare_table and existing data.
2639        let status = self.check_table_status(&table_name).await;
2640        if status.exists && !status.has_reserved_file {
2641            // Table has data but no reserved file - it was created with data
2642            return Err(NamespaceError::TableAlreadyExists {
2643                message: table_name.to_string(),
2644            }
2645            .into());
2646        }
2647
2648        // Atomically create the .lance-reserved file to mark the table as declared.
2649        // This uses put_if_not_exists semantics to avoid race conditions between
2650        // concurrent declare_table calls.
2651        let reserved_file_path = self.table_reserved_file_path(&table_name);
2652
2653        self.put_marker_file_atomic(&reserved_file_path, &format!("table {}", table_name))
2654            .await
2655            .map_err(|e| {
2656                if e.contains("already exists") {
2657                    lance_core::Error::from(NamespaceError::TableAlreadyExists {
2658                        message: table_name.to_string(),
2659                    })
2660                } else {
2661                    lance_core::Error::from(NamespaceError::Internal { message: e })
2662                }
2663            })?;
2664
2665        // For backwards compatibility, only skip vending credentials when explicitly set to false
2666        let vend_credentials = request.vend_credentials.unwrap_or(true);
2667        let identity = request.identity.as_deref();
2668        let storage_options = self
2669            .get_storage_options_for_table(&table_uri, vend_credentials, identity)
2670            .await?;
2671
2672        Ok(DeclareTableResponse {
2673            location: Some(table_uri),
2674            storage_options,
2675            properties: request.properties,
2676            managed_versioning: if self.table_version_tracking_enabled {
2677                Some(true)
2678            } else {
2679                None
2680            },
2681            ..Default::default()
2682        })
2683    }
2684
2685    async fn register_table(
2686        &self,
2687        request: lance_namespace::models::RegisterTableRequest,
2688    ) -> Result<lance_namespace::models::RegisterTableResponse> {
2689        self.record_op("register_table");
2690        // If manifest is enabled, delegate to manifest namespace
2691        if let Some(ref manifest_ns) = self.manifest_ns {
2692            return LanceNamespace::register_table(manifest_ns.as_ref(), request).await;
2693        }
2694
2695        // Without manifest, register_table is not supported
2696        Err(NamespaceError::Unsupported {
2697            message: "register_table is only supported when manifest mode is enabled".to_string(),
2698        }
2699        .into())
2700    }
2701
2702    async fn deregister_table(
2703        &self,
2704        request: lance_namespace::models::DeregisterTableRequest,
2705    ) -> Result<lance_namespace::models::DeregisterTableResponse> {
2706        self.record_op("deregister_table");
2707        // If manifest is enabled, delegate to manifest namespace
2708        if let Some(ref manifest_ns) = self.manifest_ns {
2709            return LanceNamespace::deregister_table(manifest_ns.as_ref(), request).await;
2710        }
2711
2712        // V1 mode: create a .lance-deregistered marker file in the table directory
2713        let table_name = Self::table_name_from_id(&request.id)?;
2714        let table_uri = self.table_full_uri(&table_name);
2715
2716        // Check table existence and deregistration status.
2717        // This provides better error messages for common cases.
2718        let status = self.check_table_status(&table_name).await;
2719
2720        if !status.exists {
2721            return Err(NamespaceError::TableNotFound {
2722                message: table_name.to_string(),
2723            }
2724            .into());
2725        }
2726
2727        if status.is_deregistered {
2728            return Err(NamespaceError::TableNotFound {
2729                message: format!("Table is already deregistered: {}", table_name),
2730            }
2731            .into());
2732        }
2733
2734        // Atomically create the .lance-deregistered marker file.
2735        // This uses put_if_not_exists semantics to prevent race conditions
2736        // when multiple processes try to deregister the same table concurrently.
2737        // If a race occurs and another process already created the file,
2738        // we'll get an AlreadyExists error which we convert to a proper message.
2739        let deregistered_path = self.table_deregistered_file_path(&table_name);
2740        self.put_marker_file_atomic(
2741            &deregistered_path,
2742            &format!("deregistration marker for table {}", table_name),
2743        )
2744        .await
2745        .map_err(|e| {
2746            if e.contains("already exists") {
2747                lance_core::Error::from(NamespaceError::InvalidTableState {
2748                    message: format!("Table is already deregistered: {}", table_name),
2749                })
2750            } else {
2751                lance_core::Error::from(NamespaceError::Internal { message: e })
2752            }
2753        })?;
2754
2755        Ok(lance_namespace::models::DeregisterTableResponse {
2756            id: request.id,
2757            location: Some(table_uri),
2758            ..Default::default()
2759        })
2760    }
2761
2762    async fn list_table_versions(
2763        &self,
2764        request: ListTableVersionsRequest,
2765    ) -> Result<ListTableVersionsResponse> {
2766        self.record_op("list_table_versions");
2767        // When table_version_storage_enabled, query from __manifest
2768        if self.table_version_storage_enabled
2769            && let Some(ref manifest_ns) = self.manifest_ns
2770        {
2771            let table_id = request.id.clone().unwrap_or_default();
2772            let want_descending = request.descending == Some(true);
2773            return manifest_ns
2774                .list_table_versions(&table_id, want_descending, request.limit)
2775                .await;
2776        }
2777
2778        // Fallback when table_version_storage is not enabled: list from _versions/ directory
2779        let table_uri = self.resolve_table_location(&request.id).await?;
2780        let want_descending = request.descending == Some(true);
2781        let table_versions = self
2782            .list_table_versions_from_storage(&table_uri, want_descending, request.limit)
2783            .await?;
2784
2785        Ok(ListTableVersionsResponse {
2786            versions: table_versions,
2787            page_token: None,
2788        })
2789    }
2790
2791    async fn create_table_version(
2792        &self,
2793        request: CreateTableVersionRequest,
2794    ) -> Result<CreateTableVersionResponse> {
2795        self.record_op("create_table_version");
2796        let table_uri = self.resolve_table_location(&request.id).await?;
2797
2798        let staging_manifest_path = &request.manifest_path;
2799        let version = request.version as u64;
2800
2801        let table_path = self.object_store_path_from_uri(&table_uri)?;
2802
2803        // Determine naming scheme from request, default to V2
2804        let naming_scheme = match request.naming_scheme.as_deref() {
2805            Some("V1") => ManifestNamingScheme::V1,
2806            _ => ManifestNamingScheme::V2,
2807        };
2808
2809        // Compute final path using the naming scheme
2810        let final_path = naming_scheme.manifest_path(&table_path, version);
2811
2812        let staging_path = Path::parse(staging_manifest_path).map_err(|e| {
2813            lance_core::Error::from(NamespaceError::InvalidInput {
2814                message: format!(
2815                    "Invalid staging manifest path '{}': {}",
2816                    staging_manifest_path, e
2817                ),
2818            })
2819        })?;
2820
2821        let copy_result = match self
2822            .object_store
2823            .inner
2824            .copy_if_not_exists(&staging_path, &final_path)
2825            .await
2826        {
2827            Ok(()) => Ok(()),
2828            Err(ObjectStoreError::NotImplemented) | Err(ObjectStoreError::NotSupported { .. }) => {
2829                let manifest_data = self
2830                    .object_store
2831                    .inner
2832                    .get(&staging_path)
2833                    .await
2834                    .map_err(|e| {
2835                        lance_core::Error::from(NamespaceError::Internal {
2836                            message: format!(
2837                                "Failed to read staging manifest at '{}': {}",
2838                                staging_manifest_path, e
2839                            ),
2840                        })
2841                    })?
2842                    .bytes()
2843                    .await
2844                    .map_err(|e| {
2845                        lance_core::Error::from(NamespaceError::Internal {
2846                            message: format!(
2847                                "Failed to read staging manifest bytes at '{}': {}",
2848                                staging_manifest_path, e
2849                            ),
2850                        })
2851                    })?;
2852                self.object_store
2853                    .inner
2854                    .put_opts(
2855                        &final_path,
2856                        manifest_data.into(),
2857                        PutOptions {
2858                            mode: PutMode::Create,
2859                            ..Default::default()
2860                        },
2861                    )
2862                    .await
2863                    .map(|_| ())
2864            }
2865            Err(e) => Err(e),
2866        };
2867
2868        match copy_result {
2869            Ok(()) => {}
2870            Err(ObjectStoreError::AlreadyExists { .. })
2871            | Err(ObjectStoreError::Precondition { .. }) => {
2872                return Err(lance_core::Error::from(
2873                    NamespaceError::ConcurrentModification {
2874                        message: format!(
2875                            "Version {} already exists for table at '{}'",
2876                            version, table_uri
2877                        ),
2878                    },
2879                ));
2880            }
2881            Err(e) => {
2882                return Err(lance_core::Error::from(NamespaceError::Internal {
2883                    message: format!(
2884                        "Failed to create version {} for table at '{}': {}",
2885                        version, table_uri, e
2886                    ),
2887                }));
2888            }
2889        }
2890
2891        let final_meta = self
2892            .object_store
2893            .inner
2894            .head(&final_path)
2895            .await
2896            .map_err(|e| {
2897                lance_core::Error::from(NamespaceError::Internal {
2898                    message: format!(
2899                        "Failed to stat created version {} for table at '{}': {}",
2900                        version, table_uri, e
2901                    ),
2902                })
2903            })?;
2904        let manifest_size = final_meta.size as i64;
2905
2906        // Delete the staging manifest after successful copy
2907        if let Err(e) = self.object_store.inner.delete(&staging_path).await {
2908            log::warn!(
2909                "Failed to delete staging manifest at '{}': {:?}",
2910                staging_path,
2911                e
2912            );
2913        }
2914
2915        // If table_version_storage_enabled is enabled, also record in __manifest (best-effort)
2916        if self.table_version_storage_enabled
2917            && let Some(ref manifest_ns) = self.manifest_ns
2918        {
2919            let table_id_str =
2920                manifest::ManifestNamespace::str_object_id(&request.id.clone().unwrap_or_default());
2921            let object_id =
2922                manifest::ManifestNamespace::build_version_object_id(&table_id_str, version as i64);
2923            let metadata_json = serde_json::json!({
2924                "manifest_path": final_path.to_string(),
2925                "manifest_size": manifest_size,
2926                "e_tag": final_meta.e_tag,
2927                "naming_scheme": request.naming_scheme.as_deref().unwrap_or("V2"),
2928            })
2929            .to_string();
2930
2931            if let Err(e) = manifest_ns
2932                .insert_into_manifest_with_metadata(
2933                    vec![manifest::ManifestEntry {
2934                        object_id,
2935                        object_type: manifest::ObjectType::TableVersion,
2936                        location: None,
2937                        metadata: Some(metadata_json),
2938                    }],
2939                    None,
2940                )
2941                .await
2942            {
2943                log::warn!(
2944                    "Failed to record table version in __manifest (best-effort): {:?}",
2945                    e
2946                );
2947            }
2948        }
2949
2950        Ok(CreateTableVersionResponse {
2951            transaction_id: None,
2952            version: Some(Box::new(TableVersion {
2953                version: version as i64,
2954                manifest_path: final_path.to_string(),
2955                manifest_size: Some(manifest_size),
2956                e_tag: final_meta.e_tag,
2957                timestamp_millis: None,
2958                metadata: None,
2959            })),
2960        })
2961    }
2962
2963    async fn describe_table_version(
2964        &self,
2965        request: DescribeTableVersionRequest,
2966    ) -> Result<DescribeTableVersionResponse> {
2967        self.record_op("describe_table_version");
2968        // When table_version_storage_enabled and a specific version is requested,
2969        // query from __manifest to avoid opening the entire dataset
2970        if self.table_version_storage_enabled
2971            && let (Some(manifest_ns), Some(version)) = (&self.manifest_ns, request.version)
2972        {
2973            let table_id = request.id.clone().unwrap_or_default();
2974            return manifest_ns.describe_table_version(&table_id, version).await;
2975        }
2976
2977        // Fallback when table_version_storage is not enabled: inspect physical manifests directly.
2978        let table_uri = self.resolve_table_location(&request.id).await?;
2979        let versions = self
2980            .list_table_versions_from_storage(&table_uri, true, None)
2981            .await?;
2982        let table_version = if let Some(requested_version) = request.version {
2983            versions
2984                .into_iter()
2985                .find(|version| version.version == requested_version)
2986                .ok_or_else(|| {
2987                    lance_core::Error::from(NamespaceError::TableVersionNotFound {
2988                        message: format!(
2989                            "version {} for table {}",
2990                            requested_version,
2991                            Self::format_table_id_from_request(&request.id)
2992                        ),
2993                    })
2994                })?
2995        } else {
2996            versions.into_iter().next().ok_or_else(|| {
2997                lance_core::Error::from(NamespaceError::TableVersionNotFound {
2998                    message: format!(
2999                        "latest version for table {}",
3000                        Self::format_table_id_from_request(&request.id)
3001                    ),
3002                })
3003            })?
3004        };
3005
3006        Ok(DescribeTableVersionResponse {
3007            version: Box::new(table_version),
3008        })
3009    }
3010
3011    async fn batch_delete_table_versions(
3012        &self,
3013        request: BatchDeleteTableVersionsRequest,
3014    ) -> Result<BatchDeleteTableVersionsResponse> {
3015        self.record_op("batch_delete_table_versions");
3016        // Single-table mode: use `id` (from path parameter) + `ranges` to delete
3017        // versions from one table.
3018        let ranges: Vec<(i64, i64)> = request
3019            .ranges
3020            .iter()
3021            .map(|r| {
3022                let start = r.start_version;
3023                let end = if r.end_version > 0 {
3024                    r.end_version
3025                } else {
3026                    start
3027                };
3028                (start, end)
3029            })
3030            .collect();
3031        let table_entries = vec![TableDeleteEntry {
3032            table_id: request.id.clone(),
3033            ranges,
3034        }];
3035
3036        let mut total_deleted_count = 0i64;
3037
3038        if self.table_version_storage_enabled
3039            && let Some(ref manifest_ns) = self.manifest_ns
3040        {
3041            // Phase 1 (atomic commit point): Delete version records from __manifest
3042            // for ALL tables in a single atomic operation. This is the authoritative
3043            // source of truth — once __manifest entries are removed, the versions
3044            // are logically deleted across all tables atomically.
3045
3046            // Collect all (table_id_str, ranges) for batch deletion
3047            let mut all_object_ids: Vec<String> = Vec::new();
3048            for te in &table_entries {
3049                let table_id_str = manifest::ManifestNamespace::str_object_id(
3050                    &te.table_id.clone().unwrap_or_default(),
3051                );
3052                for (start, end) in &te.ranges {
3053                    for version in *start..=*end {
3054                        let object_id = manifest::ManifestNamespace::build_version_object_id(
3055                            &table_id_str,
3056                            version,
3057                        );
3058                        all_object_ids.push(object_id);
3059                    }
3060                }
3061            }
3062
3063            if !all_object_ids.is_empty() {
3064                total_deleted_count = manifest_ns
3065                    .batch_delete_table_versions_by_object_ids(&all_object_ids)
3066                    .await?;
3067            }
3068
3069            // Phase 2: Delete physical manifest files (best-effort).
3070            // Even if some file deletions fail, the versions are already removed from
3071            // __manifest, so they won't be visible to readers. Leftover files are
3072            // orphaned but harmless and can be cleaned up later.
3073            let _ = self
3074                .delete_physical_version_files(&table_entries, true)
3075                .await;
3076
3077            return Ok(BatchDeleteTableVersionsResponse {
3078                deleted_count: Some(total_deleted_count),
3079                transaction_id: None,
3080            });
3081        }
3082
3083        // Fallback when table_version_storage is not enabled: delete physical files directly (no __manifest)
3084        total_deleted_count = self
3085            .delete_physical_version_files(&table_entries, false)
3086            .await?;
3087
3088        Ok(BatchDeleteTableVersionsResponse {
3089            deleted_count: Some(total_deleted_count),
3090            transaction_id: None,
3091        })
3092    }
3093
3094    async fn create_table_index(
3095        &self,
3096        request: CreateTableIndexRequest,
3097    ) -> Result<CreateTableIndexResponse> {
3098        self.record_op("create_table_index");
3099        let table_uri = self.resolve_table_location(&request.id).await?;
3100        let mut dataset = self
3101            .load_dataset(&table_uri, None, "create_table_index")
3102            .await?;
3103        let index_request = Self::build_index_params(&request)?;
3104
3105        dataset
3106            .create_index(
3107                &[request.column.as_str()],
3108                index_request.index_type(),
3109                request.name.clone(),
3110                index_request.params(),
3111                false,
3112            )
3113            .await
3114            .map_err(|e| {
3115                let err_msg = format!("{}", e);
3116                let ns_err = if err_msg.contains("already exists") {
3117                    NamespaceError::TableIndexAlreadyExists {
3118                        message: format!(
3119                            "Index '{}' already exists on table '{}': {:?}",
3120                            request.name.as_deref().unwrap_or("<auto-generated>"),
3121                            table_uri,
3122                            e
3123                        ),
3124                    }
3125                } else if err_msg.contains("not found") || err_msg.contains("does not exist") {
3126                    NamespaceError::TableColumnNotFound {
3127                        message: format!(
3128                            "Column '{}' not found for table '{}': {:?}",
3129                            request.column, table_uri, e
3130                        ),
3131                    }
3132                } else {
3133                    NamespaceError::Internal {
3134                        message: format!(
3135                            "Failed to create {} index '{}' on column '{}' for table '{}': {:?}",
3136                            request.index_type,
3137                            request.name.as_deref().unwrap_or("<auto-generated>"),
3138                            request.column,
3139                            table_uri,
3140                            e
3141                        ),
3142                    }
3143                };
3144                lance_core::Error::from(ns_err)
3145            })?;
3146
3147        let transaction_id = dataset
3148            .read_transaction()
3149            .await
3150            .map_err(|e| {
3151                lance_core::Error::from(NamespaceError::Internal {
3152                    message: format!(
3153                        "Failed to read committed transaction after creating index on '{}': {}",
3154                        table_uri, e
3155                    ),
3156                })
3157            })?
3158            .map(|transaction| transaction.uuid);
3159
3160        Ok(CreateTableIndexResponse { transaction_id })
3161    }
3162
3163    async fn list_table_indices(
3164        &self,
3165        request: ListTableIndicesRequest,
3166    ) -> Result<ListTableIndicesResponse> {
3167        self.record_op("list_table_indices");
3168        let table_uri = self.resolve_table_location(&request.id).await?;
3169        let dataset = self
3170            .load_dataset(&table_uri, request.version, "list_table_indices")
3171            .await?;
3172        let mut indices = dataset
3173            .describe_indices(None)
3174            .await
3175            .map_err(|e| {
3176                lance_core::Error::from(NamespaceError::Internal {
3177                    message: format!("Failed to describe table indices for '{}': {:?}", table_uri, e),
3178                })
3179            })?
3180            .into_iter()
3181            .filter(|description| {
3182                description
3183                    .metadata()
3184                    .first()
3185                    .map(|metadata| !is_system_index(metadata))
3186                    .unwrap_or(false)
3187            })
3188            .map(|description| {
3189                let columns = description
3190                    .field_ids()
3191                    .iter()
3192                        .map(|field_id| {
3193                        dataset
3194                            .schema()
3195                            .field_path(i32::try_from(*field_id).map_err(|e| {
3196                                lance_core::Error::from(NamespaceError::Internal {
3197                                    message: format!(
3198                                        "Field id {} does not fit in i32 for table '{}': {}",
3199                                        field_id, table_uri, e
3200                                    ),
3201                                })
3202                            })?)
3203                            .map_err(|e| {
3204                            lance_core::Error::from(NamespaceError::Internal {
3205                                message: format!(
3206                                    "Failed to resolve field path for field_id {} in table '{}': {}",
3207                                    field_id, table_uri, e
3208                                ),
3209                            })
3210                        })
3211                    })
3212                    .collect::<Result<Vec<_>>>()?;
3213
3214                Ok(IndexContent {
3215                    index_name: description.name().to_string(),
3216                    index_uuid: description.metadata()[0].uuid.to_string(),
3217                    columns,
3218                    status: "SUCCEEDED".to_string(),
3219                })
3220            })
3221            .collect::<Result<Vec<_>>>()?;
3222
3223        let page_token = Self::paginate_indices(&mut indices, request.page_token, request.limit);
3224        Ok(ListTableIndicesResponse {
3225            indexes: indices,
3226            page_token,
3227        })
3228    }
3229
3230    async fn describe_table_index_stats(
3231        &self,
3232        request: DescribeTableIndexStatsRequest,
3233    ) -> Result<DescribeTableIndexStatsResponse> {
3234        self.record_op("describe_table_index_stats");
3235        let table_uri = self.resolve_table_location(&request.id).await?;
3236        let dataset = self
3237            .load_dataset(&table_uri, request.version, "describe_table_index_stats")
3238            .await?;
3239        let index_name = request.index_name.as_deref().ok_or_else(|| {
3240            lance_core::Error::from(NamespaceError::InvalidInput {
3241                message: "Index name is required for describe_table_index_stats".to_string(),
3242            })
3243        })?;
3244        let metadatas = dataset
3245            .load_indices_by_name(index_name)
3246            .await
3247            .map_err(|e| {
3248                lance_core::Error::from(NamespaceError::TableIndexNotFound {
3249                    message: format!(
3250                        "Failed to load index '{}' metadata for table '{}': {}",
3251                        index_name, table_uri, e
3252                    ),
3253                })
3254            })?;
3255        if metadatas.first().is_some_and(is_system_index) {
3256            return Err(NamespaceError::Unsupported {
3257                message: format!("System index '{}' is not exposed by this API", index_name),
3258            }
3259            .into());
3260        }
3261
3262        let stats = <Dataset as DatasetIndexExt>::index_statistics(&dataset, index_name)
3263            .await
3264            .map_err(|e| {
3265                lance_core::Error::from(NamespaceError::TableIndexNotFound {
3266                    message: format!(
3267                        "Failed to describe index statistics for '{}' on table '{}': {}",
3268                        index_name, table_uri, e
3269                    ),
3270                })
3271            })?;
3272        let stats: serde_json::Value = serde_json::from_str(&stats).map_err(|e| {
3273            lance_core::Error::from(NamespaceError::Internal {
3274                message: format!(
3275                    "Failed to parse index statistics for '{}' on table '{}': {}",
3276                    index_name, table_uri, e
3277                ),
3278            })
3279        })?;
3280
3281        Ok(Self::describe_table_index_stats_response(&stats))
3282    }
3283
3284    async fn describe_transaction(
3285        &self,
3286        request: DescribeTransactionRequest,
3287    ) -> Result<DescribeTransactionResponse> {
3288        self.record_op("describe_transaction");
3289        let mut request_id = request.id.ok_or_else(|| {
3290            lance_core::Error::from(NamespaceError::InvalidInput {
3291                message: "Transaction id must include table id and transaction identifier"
3292                    .to_string(),
3293            })
3294        })?;
3295        if request_id.len() < 2 {
3296            return Err(NamespaceError::InvalidInput {
3297                message: format!(
3298                    "Transaction request id must include table id and transaction identifier, got {:?}",
3299                    request_id
3300                ),
3301            }
3302            .into());
3303        }
3304
3305        let id = request_id.pop().expect("request_id len checked above");
3306        let table_id = Some(request_id);
3307        let table_uri = self.resolve_table_location(&table_id).await?;
3308        let dataset = self
3309            .load_dataset(&table_uri, None, "describe_transaction")
3310            .await?;
3311        let (version, transaction) = self.find_transaction(&dataset, &id).await?;
3312
3313        Ok(Self::transaction_response(version, &transaction))
3314    }
3315
3316    async fn create_table_scalar_index(
3317        &self,
3318        request: CreateTableIndexRequest,
3319    ) -> Result<CreateTableScalarIndexResponse> {
3320        self.record_op("create_table_scalar_index");
3321        let index_type = Self::parse_index_type(&request.index_type)?;
3322        if !index_type.is_scalar() {
3323            return Err(NamespaceError::InvalidInput {
3324                message: format!(
3325                    "create_table_scalar_index only supports scalar index types, got {}",
3326                    request.index_type
3327                ),
3328            }
3329            .into());
3330        }
3331
3332        let response = self.create_table_index(request).await?;
3333        Ok(CreateTableScalarIndexResponse {
3334            transaction_id: response.transaction_id,
3335        })
3336    }
3337
3338    async fn drop_table_index(
3339        &self,
3340        request: DropTableIndexRequest,
3341    ) -> Result<DropTableIndexResponse> {
3342        self.record_op("drop_table_index");
3343        let table_uri = self.resolve_table_location(&request.id).await?;
3344        let index_name = request.index_name.as_deref().ok_or_else(|| {
3345            lance_core::Error::from(NamespaceError::InvalidInput {
3346                message: "Index name is required for drop_table_index".to_string(),
3347            })
3348        })?;
3349        let mut dataset = self
3350            .load_dataset(&table_uri, None, "drop_table_index")
3351            .await?;
3352        let metadatas = dataset
3353            .load_indices_by_name(index_name)
3354            .await
3355            .map_err(|e| {
3356                lance_core::Error::from(NamespaceError::TableIndexNotFound {
3357                    message: format!(
3358                        "Failed to load index '{}' before dropping it from table '{}': {}",
3359                        index_name, table_uri, e
3360                    ),
3361                })
3362            })?;
3363        if metadatas.first().is_some_and(is_system_index) {
3364            return Err(NamespaceError::Unsupported {
3365                message: format!(
3366                    "System index '{}' cannot be dropped via this API",
3367                    index_name
3368                ),
3369            }
3370            .into());
3371        }
3372
3373        dataset.drop_index(index_name).await.map_err(|e| {
3374            lance_core::Error::from(NamespaceError::TableIndexNotFound {
3375                message: format!(
3376                    "Failed to drop index '{}' from table '{}': {}",
3377                    index_name, table_uri, e
3378                ),
3379            })
3380        })?;
3381
3382        let transaction_id = dataset
3383            .read_transaction()
3384            .await
3385            .map_err(|e| {
3386                lance_core::Error::from(NamespaceError::Internal {
3387                    message: format!(
3388                        "Failed to read committed transaction after dropping index '{}' from '{}': {}",
3389                        index_name, table_uri, e
3390                    ),
3391                })
3392            })?
3393            .map(|transaction| transaction.uuid);
3394
3395        Ok(DropTableIndexResponse { transaction_id })
3396    }
3397
3398    async fn list_all_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
3399        // In dir-only mode there are no child namespaces, so all tables live in the
3400        // root directory. This is equivalent to listing the root namespace.
3401        let mut tables = self.list_directory_tables().await?;
3402        tables = self
3403            .filter_declared_tables(tables, request.include_declared.unwrap_or(true))
3404            .await?;
3405        Self::apply_pagination(&mut tables, request.page_token, request.limit);
3406        Ok(ListTablesResponse::new(tables))
3407    }
3408
3409    async fn restore_table(&self, request: RestoreTableRequest) -> Result<RestoreTableResponse> {
3410        let version = request.version;
3411        if version < 0 {
3412            return Err(Error::invalid_input_source(
3413                format!(
3414                    "Table version for restore_table must be non-negative, got {}",
3415                    version
3416                )
3417                .into(),
3418            ));
3419        }
3420
3421        let table_uri = self.resolve_table_location(&request.id).await?;
3422        let mut dataset = self.load_dataset(&table_uri, None, "restore_table").await?;
3423
3424        dataset = dataset
3425            .checkout_version(version as u64)
3426            .await
3427            .map_err(|e| {
3428                Error::namespace_source(
3429                    format!(
3430                        "Failed to checkout version {} for restore at '{}': {}",
3431                        version, table_uri, e
3432                    )
3433                    .into(),
3434                )
3435            })?;
3436
3437        dataset.restore().await.map_err(|e| {
3438            Error::namespace_source(
3439                format!(
3440                    "Failed to restore table at '{}' to version {}: {}",
3441                    table_uri, version, e
3442                )
3443                .into(),
3444            )
3445        })?;
3446
3447        let transaction_id = dataset
3448            .read_transaction()
3449            .await
3450            .map_err(|e| {
3451                Error::namespace_source(
3452                    format!(
3453                        "Failed to read transaction after restoring '{}': {}",
3454                        table_uri, e
3455                    )
3456                    .into(),
3457                )
3458            })?
3459            .map(|t| t.uuid);
3460
3461        Ok(RestoreTableResponse { transaction_id })
3462    }
3463
3464    async fn update_table_schema_metadata(
3465        &self,
3466        request: UpdateTableSchemaMetadataRequest,
3467    ) -> Result<UpdateTableSchemaMetadataResponse> {
3468        let table_uri = self.resolve_table_location(&request.id).await?;
3469        let mut dataset = self
3470            .load_dataset(&table_uri, None, "update_table_schema_metadata")
3471            .await?;
3472
3473        let new_metadata = request.metadata.unwrap_or_default();
3474        let updated_metadata = dataset
3475            .update_schema_metadata(new_metadata.iter().map(|(k, v)| (k.as_str(), v.as_str())))
3476            .await
3477            .map_err(|e| {
3478                Error::namespace_source(
3479                    format!(
3480                        "Failed to update schema metadata for table at '{}': {}",
3481                        table_uri, e
3482                    )
3483                    .into(),
3484                )
3485            })?;
3486
3487        let transaction_id = dataset
3488            .read_transaction()
3489            .await
3490            .map_err(|e| {
3491                Error::namespace_source(
3492                    format!(
3493                        "Failed to read transaction after updating metadata for '{}': {}",
3494                        table_uri, e
3495                    )
3496                    .into(),
3497                )
3498            })?
3499            .map(|t| t.uuid);
3500
3501        Ok(UpdateTableSchemaMetadataResponse {
3502            metadata: Some(updated_metadata),
3503            transaction_id,
3504        })
3505    }
3506
3507    async fn get_table_stats(
3508        &self,
3509        request: GetTableStatsRequest,
3510    ) -> Result<GetTableStatsResponse> {
3511        let table_uri = self.resolve_table_location(&request.id).await?;
3512        let dataset = Arc::new(
3513            self.load_dataset(&table_uri, None, "get_table_stats")
3514                .await?,
3515        );
3516
3517        // Compute total bytes on disk using field-level statistics
3518        let data_stats = dataset.calculate_data_stats().await.map_err(|e| {
3519            Error::namespace_source(
3520                format!(
3521                    "Failed to calculate data statistics for table at '{}': {}",
3522                    table_uri, e
3523                )
3524                .into(),
3525            )
3526        })?;
3527        let total_bytes: i64 = data_stats
3528            .fields
3529            .iter()
3530            .map(|f| f.bytes_on_disk as i64)
3531            .sum();
3532
3533        // Collect per-fragment row counts
3534        let fragment_row_futures: Vec<_> = dataset
3535            .get_fragments()
3536            .into_iter()
3537            .map(|f| async move { f.physical_rows().await })
3538            .collect();
3539        let fragment_row_results = futures::future::join_all(fragment_row_futures).await;
3540        let mut fragment_row_counts: Vec<i64> = fragment_row_results
3541            .into_iter()
3542            .filter_map(|r| r.ok())
3543            .map(|r| r as i64)
3544            .collect();
3545
3546        let num_fragments = fragment_row_counts.len() as i64;
3547        let num_rows: i64 = fragment_row_counts.iter().sum();
3548
3549        // Fragments with fewer rows than the compaction target are considered "small",
3550        // consistent with CompactionOptions::target_rows_per_fragment default.
3551        const SMALL_FRAGMENT_THRESHOLD: i64 = 1024 * 1024;
3552        let num_small_fragments = fragment_row_counts
3553            .iter()
3554            .filter(|&&r| r < SMALL_FRAGMENT_THRESHOLD)
3555            .count() as i64;
3556
3557        // Compute length summary statistics
3558        fragment_row_counts.sort_unstable();
3559        let lengths = if fragment_row_counts.is_empty() {
3560            FragmentSummary::new(0, 0, 0, 0, 0, 0, 0)
3561        } else {
3562            let len = fragment_row_counts.len();
3563            let min = fragment_row_counts[0];
3564            let max = fragment_row_counts[len - 1];
3565            let mean = num_rows / num_fragments;
3566            let pct = |p: f64| fragment_row_counts[((len - 1) as f64 * p) as usize];
3567            FragmentSummary::new(min, max, mean, pct(0.25), pct(0.50), pct(0.75), pct(0.99))
3568        };
3569
3570        // Count non-system indices
3571        let indices = dataset.load_indices().await.map_err(|e| {
3572            Error::namespace_source(
3573                format!("Failed to load indices for table at '{}': {}", table_uri, e).into(),
3574            )
3575        })?;
3576        let num_indices = indices.iter().filter(|m| !is_system_index(m)).count() as i64;
3577
3578        let fragment_stats = FragmentStats::new(num_fragments, num_small_fragments, lengths);
3579        Ok(GetTableStatsResponse::new(
3580            total_bytes,
3581            num_rows,
3582            num_indices,
3583            fragment_stats,
3584        ))
3585    }
3586
3587    async fn explain_table_query_plan(
3588        &self,
3589        request: ExplainTableQueryPlanRequest,
3590    ) -> Result<String> {
3591        let table_uri = self.resolve_table_location(&request.id).await?;
3592        let dataset = self
3593            .load_dataset(
3594                &table_uri,
3595                request.query.version,
3596                "explain_table_query_plan",
3597            )
3598            .await?;
3599        let verbose = request.verbose.unwrap_or(false);
3600
3601        let mut scanner = dataset.scan();
3602        Self::apply_query_params_to_scanner(
3603            &mut scanner,
3604            request.query.filter.as_deref(),
3605            request.query.columns.as_deref(),
3606            request.query.vector_column.as_deref(),
3607            &request.query.vector,
3608            request.query.k,
3609            request.query.offset,
3610            request.query.prefilter,
3611            request.query.bypass_vector_index,
3612            request.query.nprobes,
3613            request.query.ef,
3614            request.query.refine_factor,
3615            request.query.distance_type.as_deref(),
3616            request.query.fast_search,
3617            request.query.with_row_id,
3618            request.query.lower_bound,
3619            request.query.upper_bound,
3620            "explain_table_query_plan",
3621        )?;
3622
3623        scanner.explain_plan(verbose).await.map_err(|e| {
3624            Error::namespace_source(
3625                format!(
3626                    "Failed to explain query plan for table at '{}': {}",
3627                    table_uri, e
3628                )
3629                .into(),
3630            )
3631        })
3632    }
3633
3634    async fn analyze_table_query_plan(
3635        &self,
3636        request: AnalyzeTableQueryPlanRequest,
3637    ) -> Result<String> {
3638        let table_uri = self.resolve_table_location(&request.id).await?;
3639        let dataset = self
3640            .load_dataset(&table_uri, request.version, "analyze_table_query_plan")
3641            .await?;
3642
3643        let mut scanner = dataset.scan();
3644        Self::apply_query_params_to_scanner(
3645            &mut scanner,
3646            request.filter.as_deref(),
3647            request.columns.as_deref(),
3648            request.vector_column.as_deref(),
3649            &request.vector,
3650            request.k,
3651            request.offset,
3652            request.prefilter,
3653            request.bypass_vector_index,
3654            request.nprobes,
3655            request.ef,
3656            request.refine_factor,
3657            request.distance_type.as_deref(),
3658            request.fast_search,
3659            request.with_row_id,
3660            request.lower_bound,
3661            request.upper_bound,
3662            "analyze_table_query_plan",
3663        )?;
3664
3665        scanner.analyze_plan().await.map_err(|e| {
3666            Error::namespace_source(
3667                format!(
3668                    "Failed to analyze query plan for table at '{}': {}",
3669                    table_uri, e
3670                )
3671                .into(),
3672            )
3673        })
3674    }
3675
3676    async fn count_table_rows(&self, request: CountTableRowsRequest) -> Result<i64> {
3677        self.record_op("count_table_rows");
3678        let table_uri = self.resolve_table_location(&request.id).await?;
3679        let dataset = self
3680            .load_dataset(&table_uri, request.version, "count_table_rows")
3681            .await?;
3682
3683        let count =
3684            dataset
3685                .count_rows(request.predicate)
3686                .await
3687                .map_err(|e| NamespaceError::Internal {
3688                    message: format!("Failed to count rows for table at '{}': {:?}", table_uri, e),
3689                })?;
3690
3691        Ok(count as i64)
3692    }
3693
3694    async fn insert_into_table(
3695        &self,
3696        request: InsertIntoTableRequest,
3697        request_data: Bytes,
3698    ) -> Result<InsertIntoTableResponse> {
3699        self.record_op("insert_into_table");
3700        let table_uri = self.resolve_table_location(&request.id).await?;
3701        let (reader, _num_rows) =
3702            Self::ipc_reader_from_request_data(&request_data, "insert_into_table")?;
3703
3704        let mode = match request.mode.as_deref() {
3705            Some(m) if m.eq_ignore_ascii_case("overwrite") => WriteMode::Overwrite,
3706            Some(m) if m.eq_ignore_ascii_case("append") => WriteMode::Append,
3707            None => WriteMode::Append,
3708            Some(m) => {
3709                return Err(lance_namespace::error::NamespaceError::InvalidInput {
3710                    message: format!(
3711                        "Unsupported write mode '{}'. Supported modes are: 'append', 'overwrite'",
3712                        m
3713                    ),
3714                }
3715                .into());
3716            }
3717        };
3718
3719        if !self.table_uri_has_actual_manifests(&table_uri).await? {
3720            self.write_reader_to_table(&table_uri, reader, WriteMode::Create, None)
3721                .await?;
3722        } else {
3723            self.write_reader_to_table(&table_uri, reader, mode, None)
3724                .await?;
3725        }
3726
3727        Ok(InsertIntoTableResponse {
3728            transaction_id: None,
3729        })
3730    }
3731
3732    async fn merge_insert_into_table(
3733        &self,
3734        request: MergeInsertIntoTableRequest,
3735        request_data: Bytes,
3736    ) -> Result<MergeInsertIntoTableResponse> {
3737        self.record_op("merge_insert_into_table");
3738        let table_uri = self.resolve_table_location(&request.id).await?;
3739        let on = request.on.as_ref().ok_or_else(|| {
3740            lance_core::Error::from(NamespaceError::InvalidInput {
3741                message: "'on' field is required for merge_insert_into_table".to_string(),
3742            })
3743        })?;
3744
3745        let table_has_manifests = self.table_uri_has_actual_manifests(&table_uri).await?;
3746        let (reader, num_rows) =
3747            Self::ipc_reader_from_request_data(&request_data, "merge_insert_into_table")?;
3748
3749        if !table_has_manifests {
3750            let dataset = self
3751                .write_reader_to_table(&table_uri, reader, WriteMode::Create, None)
3752                .await?;
3753            let version = dataset.version().version as i64;
3754            return Ok(MergeInsertIntoTableResponse {
3755                transaction_id: None,
3756                num_updated_rows: Some(0),
3757                num_inserted_rows: Some(num_rows as i64),
3758                num_deleted_rows: Some(0),
3759                version: Some(version),
3760            });
3761        }
3762
3763        let dataset = Arc::new(
3764            self.load_dataset(&table_uri, None, "merge_insert_into_table")
3765                .await?,
3766        );
3767
3768        let mut merge_builder = MergeInsertBuilder::try_new(dataset.clone(), vec![on.clone()])
3769            .map_err(|e| {
3770                lance_core::Error::from(NamespaceError::InvalidInput {
3771                    message: format!("Failed to create merge_insert_into_table builder: {}", e),
3772                })
3773            })?;
3774
3775        if let Some(filter) = request.when_matched_update_all_filt.as_deref() {
3776            let behavior = WhenMatched::update_if(dataset.as_ref(), filter).map_err(|e| {
3777                lance_core::Error::from(NamespaceError::InvalidInput {
3778                    message: format!(
3779                        "Invalid when_matched_update_all_filt for merge_insert_into_table: {}",
3780                        e
3781                    ),
3782                })
3783            })?;
3784            merge_builder.when_matched(behavior);
3785        } else if request.when_matched_update_all.unwrap_or(false) {
3786            merge_builder.when_matched(WhenMatched::UpdateAll);
3787        }
3788
3789        if matches!(request.when_not_matched_insert_all, Some(false)) {
3790            merge_builder.when_not_matched(WhenNotMatched::DoNothing);
3791        } else {
3792            merge_builder.when_not_matched(WhenNotMatched::InsertAll);
3793        }
3794
3795        if let Some(filter) = request.when_not_matched_by_source_delete_filt.as_deref() {
3796            let behavior = WhenNotMatchedBySource::delete_if(dataset.as_ref(), filter).map_err(|e| {
3797                lance_core::Error::from(NamespaceError::InvalidInput {
3798                    message: format!(
3799                        "Invalid when_not_matched_by_source_delete_filt for merge_insert_into_table: {}",
3800                        e
3801                    ),
3802                })
3803            })?;
3804            merge_builder.when_not_matched_by_source(behavior);
3805        } else if request.when_not_matched_by_source_delete.unwrap_or(false) {
3806            merge_builder.when_not_matched_by_source(WhenNotMatchedBySource::Delete);
3807        }
3808
3809        if let Some(use_index) = request.use_index {
3810            merge_builder.use_index(use_index);
3811        }
3812
3813        let (dataset, stats) = merge_builder
3814            .try_build()
3815            .map_err(|e| {
3816                lance_core::Error::from(NamespaceError::InvalidInput {
3817                    message: format!("Failed to build merge_insert_into_table job: {}", e),
3818                })
3819            })?
3820            .execute_reader(reader)
3821            .await
3822            .map_err(|e| NamespaceError::Internal {
3823                message: format!(
3824                    "Failed to merge_insert_into_table at '{}': {}",
3825                    table_uri, e
3826                ),
3827            })?;
3828
3829        Ok(MergeInsertIntoTableResponse {
3830            transaction_id: None,
3831            num_updated_rows: Some(stats.num_updated_rows as i64),
3832            num_inserted_rows: Some(stats.num_inserted_rows as i64),
3833            num_deleted_rows: Some(stats.num_deleted_rows as i64),
3834            version: Some(dataset.version().version as i64),
3835        })
3836    }
3837
3838    async fn query_table(&self, request: QueryTableRequest) -> Result<Bytes> {
3839        use arrow::ipc::writer::FileWriter;
3840
3841        self.record_op("query_table");
3842        let table_uri = self.resolve_table_location(&request.id).await?;
3843        let dataset = self
3844            .load_dataset(&table_uri, request.version, "query_table")
3845            .await?;
3846
3847        // Build scanner
3848        let mut scanner = dataset.scan();
3849
3850        // Check if this is a vector search query
3851        // vector is Box<QueryTableRequestVector>, not Option
3852        let has_vector_query = request
3853            .vector
3854            .single_vector
3855            .as_ref()
3856            .map(|sv| !sv.is_empty())
3857            .unwrap_or(false)
3858            || request
3859                .vector
3860                .multi_vector
3861                .as_ref()
3862                .map(|mv| !mv.is_empty())
3863                .unwrap_or(false);
3864
3865        // Apply prefilter setting (must be set before nearest)
3866        if let Some(prefilter) = request.prefilter {
3867            scanner.prefilter(prefilter);
3868        }
3869
3870        // Apply vector search if query vector is provided
3871        if has_vector_query {
3872            let vector_column = request.vector_column.as_deref().unwrap_or("vector");
3873
3874            // Get the query vector(s)
3875            let query_vector: Vec<f32> = request
3876                .vector
3877                .single_vector
3878                .clone()
3879                .or_else(|| {
3880                    request
3881                        .vector
3882                        .multi_vector
3883                        .as_ref()
3884                        .and_then(|mv| mv.first().cloned())
3885                })
3886                .unwrap_or_default();
3887
3888            if !query_vector.is_empty() {
3889                let k = if request.k > 0 {
3890                    request.k as usize
3891                } else {
3892                    10
3893                };
3894                let query_array = Float32Array::from(query_vector);
3895                scanner
3896                    .nearest(vector_column, &query_array, k)
3897                    .map_err(|e| NamespaceError::InvalidInput {
3898                        message: format!("Invalid vector search: {:?}", e),
3899                    })?;
3900
3901                // Apply distance type if specified
3902                if let Some(ref distance_type) = request.distance_type {
3903                    let metric = match distance_type.to_lowercase().as_str() {
3904                        "l2" | "euclidean" => MetricType::L2,
3905                        "cosine" => MetricType::Cosine,
3906                        "dot" | "inner_product" => MetricType::Dot,
3907                        "hamming" => MetricType::Hamming,
3908                        _ => {
3909                            return Err(NamespaceError::InvalidInput {
3910                                message: format!("Unknown distance type: {}", distance_type),
3911                            }
3912                            .into());
3913                        }
3914                    };
3915                    scanner.distance_metric(metric);
3916                }
3917
3918                // Apply nprobes if specified (maps to minimum_nprobes, matching lancedb behavior)
3919                if let Some(nprobes) = request.nprobes {
3920                    scanner.minimum_nprobes(nprobes as usize);
3921                }
3922
3923                // Apply ef (HNSW search effort) if specified
3924                if let Some(ef) = request.ef {
3925                    scanner.ef(ef as usize);
3926                }
3927
3928                // Apply refine_factor if specified
3929                if let Some(refine_factor) = request.refine_factor {
3930                    scanner.refine(refine_factor as u32);
3931                }
3932
3933                // Apply distance bounds if specified
3934                if request.lower_bound.is_some() || request.upper_bound.is_some() {
3935                    scanner.distance_range(request.lower_bound, request.upper_bound);
3936                }
3937
3938                // Apply use_index (inverse of bypass_vector_index)
3939                if let Some(bypass) = request.bypass_vector_index {
3940                    scanner.use_index(!bypass);
3941                }
3942
3943                // Apply fast_search if specified
3944                if request.fast_search == Some(true) {
3945                    scanner.fast_search();
3946                }
3947            }
3948        }
3949
3950        // Apply full text search if specified
3951        if let Some(ref fts_query) = request.full_text_query {
3952            // Handle string_query (simple string FTS)
3953            if let Some(ref string_query) = fts_query.string_query {
3954                let mut fts = FullTextSearchQuery::new(string_query.query.clone());
3955
3956                // Apply column filter if specified
3957                if let Some(ref columns) = string_query.columns
3958                    && !columns.is_empty()
3959                {
3960                    fts = fts
3961                        .with_columns(columns)
3962                        .map_err(|e| NamespaceError::InvalidInput {
3963                            message: format!("Invalid FTS columns: {:?}", e),
3964                        })?;
3965                }
3966
3967                scanner
3968                    .full_text_search(fts)
3969                    .map_err(|e| NamespaceError::InvalidInput {
3970                        message: format!("Invalid full text search: {:?}", e),
3971                    })?;
3972            }
3973            // Note: structured_query would require more complex parsing
3974            // For now, we only support string_query
3975        }
3976
3977        // Apply column projection if specified
3978        if let Some(ref columns) = request.columns {
3979            if let Some(ref column_names) = columns.column_names
3980                && !column_names.is_empty()
3981            {
3982                scanner
3983                    .project(column_names)
3984                    .map_err(|e| NamespaceError::InvalidInput {
3985                        message: format!("Invalid column projection: {:?}", e),
3986                    })?;
3987            } else if let Some(ref column_aliases) = columns.column_aliases
3988                && !column_aliases.is_empty()
3989            {
3990                // column_aliases is HashMap<String, String> where key is alias, value is SQL expression
3991                let transform_pairs: Vec<(String, String)> = column_aliases
3992                    .iter()
3993                    .map(|(alias, sql)| (alias.clone(), sql.clone()))
3994                    .collect();
3995                scanner
3996                    .project_with_transform(
3997                        &transform_pairs
3998                            .iter()
3999                            .map(|(a, s)| (a.as_str(), s.as_str()))
4000                            .collect::<Vec<_>>(),
4001                    )
4002                    .map_err(|e| NamespaceError::InvalidInput {
4003                        message: format!("Invalid column alias expression: {:?}", e),
4004                    })?;
4005            }
4006        }
4007
4008        // Apply filter if specified
4009        if let Some(ref filter) = request.filter
4010            && !filter.is_empty()
4011        {
4012            scanner
4013                .filter(filter)
4014                .map_err(|e| NamespaceError::InvalidInput {
4015                    message: format!("Invalid filter expression: {:?}", e),
4016                })?;
4017        }
4018
4019        // Apply with_row_id if requested
4020        if request.with_row_id == Some(true) {
4021            scanner.with_row_id();
4022        }
4023
4024        // Apply limit if specified (k is the number of results to return)
4025        // k == 0 means no limit
4026        // Note: For vector search, limit is already applied via nearest()
4027        if !has_vector_query && request.k > 0 {
4028            let offset = request.offset.map(|o| o as i64);
4029            scanner.limit(Some(request.k as i64), offset).map_err(|e| {
4030                NamespaceError::InvalidInput {
4031                    message: format!("Invalid limit/offset: {:?}", e),
4032                }
4033            })?;
4034        } else if has_vector_query && request.offset.is_some() {
4035            // For vector search, offset is handled separately
4036            let offset = request.offset.map(|o| o as i64);
4037            scanner
4038                .limit(None, offset)
4039                .map_err(|e| NamespaceError::InvalidInput {
4040                    message: format!("Invalid offset: {:?}", e),
4041                })?;
4042        }
4043
4044        // Execute the scan and collect results
4045        let batch = scanner
4046            .try_into_batch()
4047            .await
4048            .map_err(|e| NamespaceError::Internal {
4049                message: format!("Failed to execute query: {:?}", e),
4050            })?;
4051
4052        // Serialize to Arrow IPC file format
4053        let schema = batch.schema();
4054        let mut buffer = Vec::new();
4055        {
4056            let mut writer = FileWriter::try_new(&mut buffer, &schema).map_err(|e| {
4057                NamespaceError::Internal {
4058                    message: format!("Failed to create IPC writer: {:?}", e),
4059                }
4060            })?;
4061            writer.write(&batch).map_err(|e| NamespaceError::Internal {
4062                message: format!("Failed to write batch to IPC: {:?}", e),
4063            })?;
4064            writer.finish().map_err(|e| NamespaceError::Internal {
4065                message: format!("Failed to finish IPC writer: {:?}", e),
4066            })?;
4067        }
4068
4069        Ok(Bytes::from(buffer))
4070    }
4071
4072    fn namespace_id(&self) -> String {
4073        format!("DirectoryNamespace {{ root: {:?} }}", self.root)
4074    }
4075}
4076
4077#[cfg(test)]
4078mod tests {
4079    use super::*;
4080    use arrow_ipc::reader::{FileReader, StreamReader};
4081    use lance::dataset::Dataset;
4082    use lance::index::DatasetIndexExt;
4083    use lance_core::utils::tempfile::{TempStdDir, TempStrDir};
4084    use lance_core::utils::testing::CountingObjectStore;
4085    use lance_io::object_store::{providers::local::FileStoreProvider, uri_to_url};
4086    use lance_namespace::models::{
4087        CreateTableRequest, JsonArrowDataType, JsonArrowField, JsonArrowSchema, ListTablesRequest,
4088        QueryTableRequestColumns,
4089    };
4090    use lance_namespace::schema::convert_json_arrow_schema;
4091    use std::io::Cursor;
4092    use std::sync::{
4093        Arc,
4094        atomic::{AtomicUsize, Ordering},
4095    };
4096    use url::Url;
4097
4098    fn assert_plan_contains_all(plan: &str, expected_fragments: &[&str], context: &str) {
4099        for expected_fragment in expected_fragments {
4100            assert!(
4101                plan.contains(expected_fragment),
4102                "{}. Missing fragment: '{}'. Plan:\n{}",
4103                context,
4104                expected_fragment,
4105                plan
4106            );
4107        }
4108    }
4109
4110    /// Helper to create a test DirectoryNamespace with a temporary directory
4111    async fn create_test_namespace() -> (DirectoryNamespace, TempStdDir) {
4112        let temp_dir = TempStdDir::default();
4113
4114        let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
4115            .build()
4116            .await
4117            .unwrap();
4118        (namespace, temp_dir)
4119    }
4120
4121    #[derive(Debug)]
4122    struct CountingFileStoreProvider {
4123        listing_count: Arc<AtomicUsize>,
4124    }
4125
4126    #[async_trait]
4127    impl lance_io::object_store::ObjectStoreProvider for CountingFileStoreProvider {
4128        async fn new_store(
4129            &self,
4130            base_path: Url,
4131            params: &ObjectStoreParams,
4132        ) -> Result<ObjectStore> {
4133            let provider = FileStoreProvider;
4134            let mut store = provider.new_store(base_path, params).await?;
4135            store.inner = Arc::new(CountingObjectStore::new(
4136                store.inner.clone(),
4137                self.listing_count.clone(),
4138            ));
4139            Ok(store)
4140        }
4141
4142        fn extract_path(&self, url: &Url) -> Result<Path> {
4143            let provider = FileStoreProvider;
4144            provider.extract_path(url)
4145        }
4146
4147        fn calculate_object_store_prefix(
4148            &self,
4149            url: &Url,
4150            storage_options: Option<&HashMap<String, String>>,
4151        ) -> Result<String> {
4152            let provider = FileStoreProvider;
4153            provider.calculate_object_store_prefix(url, storage_options)
4154        }
4155    }
4156
4157    fn file_object_store_uri(path: &str) -> String {
4158        let file_url = uri_to_url(path).unwrap();
4159        let mut url = Url::parse("file-object-store:///").unwrap();
4160        url.set_path(file_url.path());
4161        url.to_string()
4162    }
4163
4164    fn build_listing_counting_session(listing_count: Arc<AtomicUsize>) -> Arc<Session> {
4165        let registry = Arc::new(ObjectStoreRegistry::default());
4166        registry.insert(
4167            "file-object-store",
4168            Arc::new(CountingFileStoreProvider { listing_count }),
4169        );
4170        Arc::new(Session::new(0, 0, registry))
4171    }
4172
4173    /// Helper to create test IPC data from a schema
4174    fn create_test_ipc_data(schema: &JsonArrowSchema) -> Vec<u8> {
4175        use arrow::ipc::writer::StreamWriter;
4176
4177        let arrow_schema = convert_json_arrow_schema(schema).unwrap();
4178        let arrow_schema = Arc::new(arrow_schema);
4179        let batch = arrow::record_batch::RecordBatch::new_empty(arrow_schema.clone());
4180        let mut buffer = Vec::new();
4181        {
4182            let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
4183            writer.write(&batch).unwrap();
4184            writer.finish().unwrap();
4185        }
4186        buffer
4187    }
4188
4189    fn create_ipc_data_from_batches(
4190        schema: Arc<arrow_schema::Schema>,
4191        batches: Vec<arrow::record_batch::RecordBatch>,
4192    ) -> Vec<u8> {
4193        use arrow::ipc::writer::StreamWriter;
4194
4195        let mut buffer = Vec::new();
4196        {
4197            let mut writer = StreamWriter::try_new(&mut buffer, &schema).unwrap();
4198            for batch in &batches {
4199                writer.write(batch).unwrap();
4200            }
4201            writer.finish().unwrap();
4202        }
4203        buffer
4204    }
4205
4206    fn create_non_empty_test_ipc_data() -> Vec<u8> {
4207        use arrow::array::{Int32Array, StringArray};
4208        use arrow::record_batch::RecordBatch;
4209
4210        let schema = Arc::new(convert_json_arrow_schema(&create_test_schema()).unwrap());
4211        let batch = RecordBatch::try_new(
4212            schema.clone(),
4213            vec![
4214                Arc::new(Int32Array::from(vec![1, 2])),
4215                Arc::new(StringArray::from(vec![Some("alice"), Some("bob")])),
4216            ],
4217        )
4218        .unwrap();
4219        create_ipc_data_from_batches(schema, vec![batch])
4220    }
4221
4222    fn create_single_row_test_ipc_data() -> Vec<u8> {
4223        use arrow::array::{Int32Array, StringArray};
4224        use arrow::record_batch::RecordBatch;
4225
4226        let schema = Arc::new(convert_json_arrow_schema(&create_test_schema()).unwrap());
4227        let batch = RecordBatch::try_new(
4228            schema.clone(),
4229            vec![
4230                Arc::new(Int32Array::from(vec![10])),
4231                Arc::new(StringArray::from(vec![Some("carol")])),
4232            ],
4233        )
4234        .unwrap();
4235        create_ipc_data_from_batches(schema, vec![batch])
4236    }
4237
4238    /// Helper to create a simple test schema
4239    fn create_test_schema() -> JsonArrowSchema {
4240        let int_type = JsonArrowDataType::new("int32".to_string());
4241        let string_type = JsonArrowDataType::new("utf8".to_string());
4242
4243        let id_field = JsonArrowField {
4244            name: "id".to_string(),
4245            r#type: Box::new(int_type),
4246            nullable: false,
4247            metadata: None,
4248        };
4249
4250        let name_field = JsonArrowField {
4251            name: "name".to_string(),
4252            r#type: Box::new(string_type),
4253            nullable: true,
4254            metadata: None,
4255        };
4256
4257        JsonArrowSchema {
4258            fields: vec![id_field, name_field],
4259            metadata: None,
4260        }
4261    }
4262
4263    fn create_scalar_table_ipc_data() -> Vec<u8> {
4264        use arrow::array::{Int32Array, StringArray};
4265        use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
4266
4267        let schema = Arc::new(ArrowSchema::new(vec![
4268            Field::new("id", DataType::Int32, false),
4269            Field::new("name", DataType::Utf8, true),
4270        ]));
4271        let batch = arrow::record_batch::RecordBatch::try_new(
4272            schema.clone(),
4273            vec![
4274                Arc::new(Int32Array::from(vec![1, 2, 3])),
4275                Arc::new(StringArray::from(vec!["alice", "bob", "cory"])),
4276            ],
4277        )
4278        .unwrap();
4279        create_ipc_data_from_batches(schema, vec![batch])
4280    }
4281
4282    fn create_vector_table_ipc_data() -> Vec<u8> {
4283        use arrow::array::{FixedSizeListArray, Float32Array, Int32Array};
4284        use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
4285
4286        let schema = Arc::new(ArrowSchema::new(vec![
4287            Field::new("id", DataType::Int32, false),
4288            Field::new(
4289                "vector",
4290                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 2),
4291                true,
4292            ),
4293        ]));
4294        let vector_field = Arc::new(Field::new("item", DataType::Float32, true));
4295        let vectors = FixedSizeListArray::try_new(
4296            vector_field,
4297            2,
4298            Arc::new(Float32Array::from(vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6])),
4299            None,
4300        )
4301        .unwrap();
4302        let batch = arrow::record_batch::RecordBatch::try_new(
4303            schema.clone(),
4304            vec![Arc::new(Int32Array::from(vec![1, 2, 3])), Arc::new(vectors)],
4305        )
4306        .unwrap();
4307        create_ipc_data_from_batches(schema, vec![batch])
4308    }
4309
4310    async fn create_scalar_table(namespace: &DirectoryNamespace, table_name: &str) {
4311        let mut create_table_request = CreateTableRequest::new();
4312        create_table_request.id = Some(vec![table_name.to_string()]);
4313        namespace
4314            .create_table(
4315                create_table_request,
4316                Bytes::from(create_scalar_table_ipc_data()),
4317            )
4318            .await
4319            .unwrap();
4320    }
4321
4322    async fn create_vector_table(namespace: &DirectoryNamespace, table_name: &str) {
4323        let mut create_table_request = CreateTableRequest::new();
4324        create_table_request.id = Some(vec![table_name.to_string()]);
4325        namespace
4326            .create_table(
4327                create_table_request,
4328                Bytes::from(create_vector_table_ipc_data()),
4329            )
4330            .await
4331            .unwrap();
4332    }
4333
4334    async fn open_dataset(namespace: &DirectoryNamespace, table_name: &str) -> Dataset {
4335        let mut describe_request = DescribeTableRequest::new();
4336        describe_request.id = Some(vec![table_name.to_string()]);
4337        let table_uri = namespace
4338            .describe_table(describe_request)
4339            .await
4340            .unwrap()
4341            .location
4342            .expect("table location should exist");
4343        Dataset::open(&table_uri).await.unwrap()
4344    }
4345
4346    async fn create_scalar_index(
4347        namespace: &DirectoryNamespace,
4348        table_name: &str,
4349        index_name: &str,
4350    ) -> Option<String> {
4351        use lance_namespace::models::CreateTableIndexRequest;
4352
4353        let mut create_index_request =
4354            CreateTableIndexRequest::new("id".to_string(), "BTREE".to_string());
4355        create_index_request.id = Some(vec![table_name.to_string()]);
4356        create_index_request.name = Some(index_name.to_string());
4357        namespace
4358            .create_table_scalar_index(create_index_request)
4359            .await
4360            .unwrap()
4361            .transaction_id
4362    }
4363
4364    #[tokio::test]
4365    async fn test_create_table() {
4366        let (namespace, _temp_dir) = create_test_namespace().await;
4367
4368        // Create test IPC data
4369        let schema = create_test_schema();
4370        let ipc_data = create_test_ipc_data(&schema);
4371
4372        let mut request = CreateTableRequest::new();
4373        request.id = Some(vec!["test_table".to_string()]);
4374
4375        let response = namespace
4376            .create_table(request, bytes::Bytes::from(ipc_data))
4377            .await
4378            .unwrap();
4379
4380        assert!(response.location.is_some());
4381        assert!(response.location.unwrap().ends_with("test_table.lance"));
4382        assert_eq!(response.version, Some(1));
4383    }
4384
4385    #[tokio::test]
4386    async fn test_create_table_without_data() {
4387        let (namespace, _temp_dir) = create_test_namespace().await;
4388
4389        let mut request = CreateTableRequest::new();
4390        request.id = Some(vec!["test_table".to_string()]);
4391
4392        let result = namespace.create_table(request, bytes::Bytes::new()).await;
4393        assert!(result.is_err());
4394        assert!(
4395            result
4396                .unwrap_err()
4397                .to_string()
4398                .contains("Arrow IPC stream) is required")
4399        );
4400    }
4401
4402    #[tokio::test]
4403    async fn test_create_table_with_invalid_id() {
4404        let (namespace, _temp_dir) = create_test_namespace().await;
4405
4406        // Create test IPC data
4407        let schema = create_test_schema();
4408        let ipc_data = create_test_ipc_data(&schema);
4409
4410        // Test with empty ID
4411        let mut request = CreateTableRequest::new();
4412        request.id = Some(vec![]);
4413
4414        let result = namespace
4415            .create_table(request, bytes::Bytes::from(ipc_data.clone()))
4416            .await;
4417        assert!(result.is_err());
4418
4419        // Test with multi-level ID - should now work with manifest enabled
4420        // First create the parent namespace
4421        let mut create_ns_req = CreateNamespaceRequest::new();
4422        create_ns_req.id = Some(vec!["test_namespace".to_string()]);
4423        namespace.create_namespace(create_ns_req).await.unwrap();
4424
4425        // Now create table in the namespace
4426        let mut request = CreateTableRequest::new();
4427        request.id = Some(vec!["test_namespace".to_string(), "table".to_string()]);
4428
4429        let result = namespace
4430            .create_table(request, bytes::Bytes::from(ipc_data))
4431            .await;
4432        // Should succeed with manifest enabled
4433        assert!(
4434            result.is_ok(),
4435            "Multi-level table IDs should work with manifest enabled"
4436        );
4437    }
4438
4439    #[tokio::test]
4440    async fn test_list_tables() {
4441        let (namespace, _temp_dir) = create_test_namespace().await;
4442
4443        // Initially, no tables
4444        let mut request = ListTablesRequest::new();
4445        request.id = Some(vec![]);
4446        let response = namespace.list_tables(request).await.unwrap();
4447        assert_eq!(response.tables.len(), 0);
4448
4449        // Create test IPC data
4450        let schema = create_test_schema();
4451        let ipc_data = create_test_ipc_data(&schema);
4452
4453        // Create a table
4454        let mut create_request = CreateTableRequest::new();
4455        create_request.id = Some(vec!["table1".to_string()]);
4456        namespace
4457            .create_table(create_request, bytes::Bytes::from(ipc_data.clone()))
4458            .await
4459            .unwrap();
4460
4461        // Create another table
4462        let mut create_request = CreateTableRequest::new();
4463        create_request.id = Some(vec!["table2".to_string()]);
4464        namespace
4465            .create_table(create_request, bytes::Bytes::from(ipc_data))
4466            .await
4467            .unwrap();
4468
4469        // List tables should return both
4470        let mut request = ListTablesRequest::new();
4471        request.id = Some(vec![]);
4472        let response = namespace.list_tables(request).await.unwrap();
4473        let tables = response.tables;
4474        assert_eq!(tables.len(), 2);
4475        assert!(tables.contains(&"table1".to_string()));
4476        assert!(tables.contains(&"table2".to_string()));
4477    }
4478
4479    #[tokio::test]
4480    async fn test_list_tables_pagination() {
4481        let (namespace, _temp_dir) = create_test_namespace().await;
4482
4483        let schema = create_test_schema();
4484        let ipc_data = create_test_ipc_data(&schema);
4485
4486        for name in ["alpha", "bravo", "charlie"] {
4487            let mut req = CreateTableRequest::new();
4488            req.id = Some(vec![name.to_string()]);
4489            namespace
4490                .create_table(req, bytes::Bytes::from(ipc_data.clone()))
4491                .await
4492                .unwrap();
4493        }
4494
4495        // First page: limit=2, no page_token
4496        let first_page = namespace
4497            .list_tables(ListTablesRequest {
4498                id: Some(vec![]),
4499                limit: Some(2),
4500                ..Default::default()
4501            })
4502            .await
4503            .unwrap();
4504
4505        assert_eq!(first_page.tables, vec!["alpha", "bravo"]);
4506        assert_eq!(first_page.page_token.as_deref(), Some("bravo"));
4507
4508        // Second page: use page_token from first response
4509        let second_page = namespace
4510            .list_tables(ListTablesRequest {
4511                id: Some(vec![]),
4512                limit: Some(2),
4513                page_token: first_page.page_token.clone(),
4514                ..Default::default()
4515            })
4516            .await
4517            .unwrap();
4518
4519        assert_eq!(second_page.tables, vec!["charlie"]);
4520        assert!(second_page.page_token.is_none());
4521    }
4522
4523    #[tokio::test]
4524    async fn test_list_tables_pagination_limit_zero() {
4525        let (namespace, _temp_dir) = create_test_namespace().await;
4526
4527        let schema = create_test_schema();
4528        let ipc_data = create_test_ipc_data(&schema);
4529
4530        let mut req = CreateTableRequest::new();
4531        req.id = Some(vec!["alpha".to_string()]);
4532        namespace
4533            .create_table(req, bytes::Bytes::from(ipc_data))
4534            .await
4535            .unwrap();
4536
4537        let response = namespace
4538            .list_tables(ListTablesRequest {
4539                id: Some(vec![]),
4540                limit: Some(0),
4541                ..Default::default()
4542            })
4543            .await
4544            .unwrap();
4545
4546        assert!(response.tables.is_empty());
4547        assert!(response.page_token.is_none());
4548    }
4549
4550    #[tokio::test]
4551    async fn test_list_tables_with_namespace_id() {
4552        let (namespace, _temp_dir) = create_test_namespace().await;
4553
4554        // First create a child namespace
4555        let mut create_ns_req = CreateNamespaceRequest::new();
4556        create_ns_req.id = Some(vec!["test_namespace".to_string()]);
4557        namespace.create_namespace(create_ns_req).await.unwrap();
4558
4559        // Now list tables in the child namespace
4560        let mut request = ListTablesRequest::new();
4561        request.id = Some(vec!["test_namespace".to_string()]);
4562
4563        let result = namespace.list_tables(request).await;
4564        // Should succeed (with manifest enabled) and return empty list (no tables yet)
4565        assert!(
4566            result.is_ok(),
4567            "list_tables should work with child namespace when manifest is enabled"
4568        );
4569        let response = result.unwrap();
4570        assert_eq!(
4571            response.tables.len(),
4572            0,
4573            "Namespace should have no tables yet"
4574        );
4575    }
4576
4577    #[tokio::test]
4578    async fn test_create_scalar_index() {
4579        let (namespace, _temp_dir) = create_test_namespace().await;
4580        create_scalar_table(&namespace, "users").await;
4581
4582        let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4583        let dataset = open_dataset(&namespace, "users").await;
4584        let expected_transaction_id = dataset
4585            .read_transaction()
4586            .await
4587            .unwrap()
4588            .map(|transaction| transaction.uuid);
4589        assert_eq!(transaction_id, expected_transaction_id);
4590        let indices = dataset.load_indices().await.unwrap();
4591        assert!(indices.iter().any(|index| index.name == "users_id_idx"));
4592    }
4593
4594    #[tokio::test]
4595    async fn test_create_vector_index() {
4596        use lance_namespace::models::CreateTableIndexRequest;
4597
4598        let (namespace, _temp_dir) = create_test_namespace().await;
4599        create_vector_table(&namespace, "vectors").await;
4600
4601        let mut create_index_request =
4602            CreateTableIndexRequest::new("vector".to_string(), "IVF_FLAT".to_string());
4603        create_index_request.id = Some(vec!["vectors".to_string()]);
4604        create_index_request.name = Some("vector_idx".to_string());
4605        create_index_request.distance_type = Some("l2".to_string());
4606        let transaction_id = namespace
4607            .create_table_index(create_index_request)
4608            .await
4609            .unwrap()
4610            .transaction_id;
4611
4612        let dataset = open_dataset(&namespace, "vectors").await;
4613        let expected_transaction_id = dataset
4614            .read_transaction()
4615            .await
4616            .unwrap()
4617            .map(|transaction| transaction.uuid);
4618        assert_eq!(transaction_id, expected_transaction_id);
4619        let indices = dataset.load_indices().await.unwrap();
4620        assert!(indices.iter().any(|index| index.name == "vector_idx"));
4621    }
4622
4623    #[tokio::test]
4624    async fn test_list_table_indices() {
4625        use lance_namespace::models::ListTableIndicesRequest;
4626
4627        let (namespace, _temp_dir) = create_test_namespace().await;
4628        create_scalar_table(&namespace, "users").await;
4629        create_scalar_index(&namespace, "users", "a_idx").await;
4630        create_scalar_index(&namespace, "users", "b_idx").await;
4631        let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4632
4633        let response = namespace
4634            .list_table_indices(ListTableIndicesRequest {
4635                id: Some(vec!["users".to_string()]),
4636                ..Default::default()
4637            })
4638            .await
4639            .unwrap();
4640
4641        assert_eq!(response.indexes.len(), 3);
4642        assert_eq!(response.indexes[0].index_name, "a_idx");
4643        assert_eq!(response.indexes[1].index_name, "b_idx");
4644        assert_eq!(response.indexes[2].index_name, "users_id_idx");
4645        assert!(response.page_token.is_none());
4646        let users_id_idx = response
4647            .indexes
4648            .iter()
4649            .find(|index| index.index_name == "users_id_idx")
4650            .unwrap();
4651        assert_eq!(users_id_idx.columns, vec!["id"]);
4652        assert_eq!(users_id_idx.status, "SUCCEEDED");
4653
4654        let dataset = open_dataset(&namespace, "users").await;
4655        let expected_transaction_id = dataset
4656            .read_transaction()
4657            .await
4658            .unwrap()
4659            .map(|transaction| transaction.uuid);
4660        assert_eq!(transaction_id, expected_transaction_id);
4661        let indices = dataset.load_indices().await.unwrap();
4662        assert_eq!(
4663            indices
4664                .iter()
4665                .filter(|index| index.name == "users_id_idx")
4666                .count(),
4667            1
4668        );
4669
4670        let first_page = namespace
4671            .list_table_indices(ListTableIndicesRequest {
4672                id: Some(vec!["users".to_string()]),
4673                limit: Some(2),
4674                ..Default::default()
4675            })
4676            .await
4677            .unwrap();
4678
4679        assert_eq!(first_page.indexes.len(), 2);
4680        assert_eq!(first_page.indexes[0].index_name, "a_idx");
4681        assert_eq!(first_page.indexes[1].index_name, "b_idx");
4682        assert_eq!(first_page.page_token.as_deref(), Some("b_idx"));
4683
4684        let second_page = namespace
4685            .list_table_indices(ListTableIndicesRequest {
4686                id: Some(vec!["users".to_string()]),
4687                page_token: first_page.page_token.clone(),
4688                limit: Some(2),
4689                ..Default::default()
4690            })
4691            .await
4692            .unwrap();
4693
4694        assert_eq!(second_page.indexes.len(), 1);
4695        assert_eq!(second_page.indexes[0].index_name, "users_id_idx");
4696        assert!(second_page.page_token.is_none());
4697    }
4698
4699    #[tokio::test]
4700    async fn test_describe_table_index_stats() {
4701        use lance_namespace::models::DescribeTableIndexStatsRequest;
4702
4703        let (namespace, _temp_dir) = create_test_namespace().await;
4704        create_scalar_table(&namespace, "users").await;
4705        let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4706
4707        let response = namespace
4708            .describe_table_index_stats(DescribeTableIndexStatsRequest {
4709                id: Some(vec!["users".to_string()]),
4710                index_name: Some("users_id_idx".to_string()),
4711                ..Default::default()
4712            })
4713            .await
4714            .unwrap();
4715        assert_eq!(response.index_type, Some("BTree".to_string()));
4716        assert_eq!(response.num_indices, Some(1));
4717        assert_eq!(response.num_indexed_rows, Some(3));
4718        assert_eq!(response.num_unindexed_rows, Some(0));
4719
4720        let dataset = open_dataset(&namespace, "users").await;
4721        let expected_transaction_id = dataset
4722            .read_transaction()
4723            .await
4724            .unwrap()
4725            .map(|transaction| transaction.uuid);
4726        assert_eq!(transaction_id, expected_transaction_id);
4727        let stats: serde_json::Value =
4728            serde_json::from_str(&dataset.index_statistics("users_id_idx").await.unwrap()).unwrap();
4729        assert_eq!(stats["index_type"], "BTree");
4730        assert_eq!(stats["num_indices"], 1);
4731        assert_eq!(stats["num_indexed_rows"], 3);
4732        assert_eq!(stats["num_unindexed_rows"], 0);
4733    }
4734
4735    #[tokio::test]
4736    async fn test_describe_transaction() {
4737        use lance_namespace::models::DescribeTransactionRequest;
4738
4739        let (namespace, _temp_dir) = create_test_namespace().await;
4740        create_scalar_table(&namespace, "users").await;
4741        let transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4742        let dataset = open_dataset(&namespace, "users").await;
4743        let latest_transaction = dataset.read_transaction().await.unwrap();
4744        assert_eq!(
4745            transaction_id,
4746            latest_transaction
4747                .as_ref()
4748                .map(|transaction| transaction.uuid.clone())
4749        );
4750
4751        if let Some(transaction_id) = transaction_id {
4752            let response = namespace
4753                .describe_transaction(DescribeTransactionRequest {
4754                    id: Some(vec!["users".to_string(), transaction_id.clone()]),
4755                    ..Default::default()
4756                })
4757                .await
4758                .unwrap();
4759            assert_eq!(response.status, "SUCCEEDED");
4760            assert_eq!(
4761                response
4762                    .properties
4763                    .as_ref()
4764                    .and_then(|props| props.get("operation")),
4765                Some(&"CreateIndex".to_string())
4766            );
4767            assert_eq!(
4768                response
4769                    .properties
4770                    .as_ref()
4771                    .and_then(|props| props.get("uuid")),
4772                Some(&transaction_id)
4773            );
4774        } else {
4775            assert!(latest_transaction.is_none());
4776        }
4777    }
4778
4779    #[tokio::test]
4780    async fn test_drop_table_index() {
4781        use lance_namespace::models::{DropTableIndexRequest, ListTableIndicesRequest};
4782
4783        let (namespace, _temp_dir) = create_test_namespace().await;
4784        create_scalar_table(&namespace, "users").await;
4785        let create_transaction_id = create_scalar_index(&namespace, "users", "users_id_idx").await;
4786
4787        let drop_transaction_id = namespace
4788            .drop_table_index(DropTableIndexRequest {
4789                id: Some(vec!["users".to_string()]),
4790                index_name: Some("users_id_idx".to_string()),
4791                ..Default::default()
4792            })
4793            .await
4794            .unwrap()
4795            .transaction_id;
4796
4797        let dataset = open_dataset(&namespace, "users").await;
4798        let previous_dataset = dataset
4799            .checkout_version(dataset.version().version - 1)
4800            .await
4801            .unwrap();
4802        let previous_transaction_id = previous_dataset
4803            .read_transaction()
4804            .await
4805            .unwrap()
4806            .map(|transaction| transaction.uuid);
4807        assert_eq!(create_transaction_id, previous_transaction_id);
4808        let expected_drop_transaction_id = dataset
4809            .read_transaction()
4810            .await
4811            .unwrap()
4812            .map(|transaction| transaction.uuid);
4813        assert_eq!(drop_transaction_id, expected_drop_transaction_id);
4814        let indices = dataset.load_indices().await.unwrap();
4815        assert!(!indices.iter().any(|index| index.name == "users_id_idx"));
4816
4817        let list_response = namespace
4818            .list_table_indices(ListTableIndicesRequest {
4819                id: Some(vec!["users".to_string()]),
4820                ..Default::default()
4821            })
4822            .await
4823            .unwrap();
4824        assert!(list_response.indexes.is_empty());
4825    }
4826
4827    #[tokio::test]
4828    async fn test_describe_table() {
4829        let (namespace, _temp_dir) = create_test_namespace().await;
4830
4831        // Create a table first
4832        let schema = create_test_schema();
4833        let ipc_data = create_test_ipc_data(&schema);
4834
4835        let mut create_request = CreateTableRequest::new();
4836        create_request.id = Some(vec!["test_table".to_string()]);
4837        namespace
4838            .create_table(create_request, bytes::Bytes::from(ipc_data))
4839            .await
4840            .unwrap();
4841
4842        // Describe the table
4843        let mut request = DescribeTableRequest::new();
4844        request.id = Some(vec!["test_table".to_string()]);
4845        let response = namespace.describe_table(request).await.unwrap();
4846
4847        assert!(response.location.is_some());
4848        assert!(response.location.unwrap().ends_with("test_table.lance"));
4849    }
4850
4851    #[tokio::test]
4852    async fn test_describe_nonexistent_table() {
4853        let (namespace, _temp_dir) = create_test_namespace().await;
4854
4855        let mut request = DescribeTableRequest::new();
4856        request.id = Some(vec!["nonexistent".to_string()]);
4857
4858        let result = namespace.describe_table(request).await;
4859        assert!(result.is_err());
4860        assert!(result.unwrap_err().to_string().contains("Table not found"));
4861    }
4862
4863    #[tokio::test]
4864    async fn test_table_exists() {
4865        let (namespace, _temp_dir) = create_test_namespace().await;
4866
4867        // Create a table
4868        let schema = create_test_schema();
4869        let ipc_data = create_test_ipc_data(&schema);
4870
4871        let mut create_request = CreateTableRequest::new();
4872        create_request.id = Some(vec!["existing_table".to_string()]);
4873        namespace
4874            .create_table(create_request, bytes::Bytes::from(ipc_data))
4875            .await
4876            .unwrap();
4877
4878        // Check existing table
4879        let mut request = TableExistsRequest::new();
4880        request.id = Some(vec!["existing_table".to_string()]);
4881        let result = namespace.table_exists(request).await;
4882        assert!(result.is_ok());
4883
4884        // Check non-existent table
4885        let mut request = TableExistsRequest::new();
4886        request.id = Some(vec!["nonexistent".to_string()]);
4887        let result = namespace.table_exists(request).await;
4888        assert!(result.is_err());
4889        assert!(result.unwrap_err().to_string().contains("Table not found"));
4890    }
4891
4892    #[tokio::test]
4893    async fn test_drop_table() {
4894        let (namespace, _temp_dir) = create_test_namespace().await;
4895
4896        // Create a table
4897        let schema = create_test_schema();
4898        let ipc_data = create_test_ipc_data(&schema);
4899
4900        let mut create_request = CreateTableRequest::new();
4901        create_request.id = Some(vec!["table_to_drop".to_string()]);
4902        namespace
4903            .create_table(create_request, bytes::Bytes::from(ipc_data))
4904            .await
4905            .unwrap();
4906
4907        // Verify it exists
4908        let mut exists_request = TableExistsRequest::new();
4909        exists_request.id = Some(vec!["table_to_drop".to_string()]);
4910        assert!(namespace.table_exists(exists_request.clone()).await.is_ok());
4911
4912        // Drop the table
4913        let mut drop_request = DropTableRequest::new();
4914        drop_request.id = Some(vec!["table_to_drop".to_string()]);
4915        let response = namespace.drop_table(drop_request).await.unwrap();
4916        assert!(response.location.is_some());
4917
4918        // Verify it no longer exists
4919        assert!(namespace.table_exists(exists_request).await.is_err());
4920    }
4921
4922    #[tokio::test]
4923    async fn test_drop_nonexistent_table() {
4924        let (namespace, _temp_dir) = create_test_namespace().await;
4925
4926        let mut request = DropTableRequest::new();
4927        request.id = Some(vec!["nonexistent".to_string()]);
4928
4929        // Should not fail when dropping non-existent table (idempotent)
4930        let result = namespace.drop_table(request).await;
4931        // The operation might succeed or fail depending on implementation
4932        // But it should not panic
4933        let _ = result;
4934    }
4935
4936    #[tokio::test]
4937    async fn test_root_namespace_operations() {
4938        let (namespace, _temp_dir) = create_test_namespace().await;
4939
4940        // Test list_namespaces - should return empty list for root
4941        let mut request = ListNamespacesRequest::new();
4942        request.id = Some(vec![]);
4943        let result = namespace.list_namespaces(request).await;
4944        assert!(result.is_ok());
4945        assert_eq!(result.unwrap().namespaces.len(), 0);
4946
4947        // Test describe_namespace - should succeed for root
4948        let mut request = DescribeNamespaceRequest::new();
4949        request.id = Some(vec![]);
4950        let result = namespace.describe_namespace(request).await;
4951        assert!(result.is_ok());
4952
4953        // Test namespace_exists - root always exists
4954        let mut request = NamespaceExistsRequest::new();
4955        request.id = Some(vec![]);
4956        let result = namespace.namespace_exists(request).await;
4957        assert!(result.is_ok());
4958
4959        // Test create_namespace - root cannot be created
4960        let mut request = CreateNamespaceRequest::new();
4961        request.id = Some(vec![]);
4962        let result = namespace.create_namespace(request).await;
4963        assert!(result.is_err());
4964        assert!(result.unwrap_err().to_string().contains("already exists"));
4965
4966        // Test drop_namespace - root cannot be dropped
4967        let mut request = DropNamespaceRequest::new();
4968        request.id = Some(vec![]);
4969        let result = namespace.drop_namespace(request).await;
4970        assert!(result.is_err());
4971        assert!(
4972            result
4973                .unwrap_err()
4974                .to_string()
4975                .contains("cannot be dropped")
4976        );
4977    }
4978
4979    #[tokio::test]
4980    async fn test_non_root_namespace_operations() {
4981        let (namespace, _temp_dir) = create_test_namespace().await;
4982
4983        // With manifest enabled (default), child namespaces are now supported
4984        // Test create_namespace for non-root - should succeed with manifest
4985        let mut request = CreateNamespaceRequest::new();
4986        request.id = Some(vec!["child".to_string()]);
4987        let result = namespace.create_namespace(request).await;
4988        assert!(
4989            result.is_ok(),
4990            "Child namespace creation should succeed with manifest enabled"
4991        );
4992
4993        // Test namespace_exists for non-root - should exist after creation
4994        let mut request = NamespaceExistsRequest::new();
4995        request.id = Some(vec!["child".to_string()]);
4996        let result = namespace.namespace_exists(request).await;
4997        assert!(
4998            result.is_ok(),
4999            "Child namespace should exist after creation"
5000        );
5001
5002        // Test drop_namespace for non-root - should succeed
5003        let mut request = DropNamespaceRequest::new();
5004        request.id = Some(vec!["child".to_string()]);
5005        let result = namespace.drop_namespace(request).await;
5006        assert!(
5007            result.is_ok(),
5008            "Child namespace drop should succeed with manifest enabled"
5009        );
5010
5011        // Verify namespace no longer exists
5012        let mut request = NamespaceExistsRequest::new();
5013        request.id = Some(vec!["child".to_string()]);
5014        let result = namespace.namespace_exists(request).await;
5015        assert!(
5016            result.is_err(),
5017            "Child namespace should not exist after drop"
5018        );
5019    }
5020
5021    #[tokio::test]
5022    async fn test_config_custom_root() {
5023        let temp_dir = TempStdDir::default();
5024        let custom_path = temp_dir.join("custom");
5025        std::fs::create_dir(&custom_path).unwrap();
5026
5027        let namespace = DirectoryNamespaceBuilder::new(custom_path.to_string_lossy().to_string())
5028            .build()
5029            .await
5030            .unwrap();
5031
5032        // Create test IPC data
5033        let schema = create_test_schema();
5034        let ipc_data = create_test_ipc_data(&schema);
5035
5036        // Create a table and verify location
5037        let mut request = CreateTableRequest::new();
5038        request.id = Some(vec!["test_table".to_string()]);
5039
5040        let response = namespace
5041            .create_table(request, bytes::Bytes::from(ipc_data))
5042            .await
5043            .unwrap();
5044
5045        assert!(response.location.unwrap().contains("custom"));
5046    }
5047
5048    #[tokio::test]
5049    async fn test_config_storage_options() {
5050        let temp_dir = TempStdDir::default();
5051
5052        let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5053            .storage_option("option1", "value1")
5054            .storage_option("option2", "value2")
5055            .build()
5056            .await
5057            .unwrap();
5058
5059        // Create test IPC data
5060        let schema = create_test_schema();
5061        let ipc_data = create_test_ipc_data(&schema);
5062
5063        // Create a table and check storage options are included
5064        let mut request = CreateTableRequest::new();
5065        request.id = Some(vec!["test_table".to_string()]);
5066
5067        let response = namespace
5068            .create_table(request, bytes::Bytes::from(ipc_data))
5069            .await
5070            .unwrap();
5071
5072        let storage_options = response.storage_options.unwrap();
5073        assert_eq!(storage_options.get("option1"), Some(&"value1".to_string()));
5074        assert_eq!(storage_options.get("option2"), Some(&"value2".to_string()));
5075    }
5076
5077    /// When no credential vendor is configured, `describe_table` and
5078    /// `declare_table` must strip credential keys from storage options
5079    /// while preserving non-credential config (region, endpoint, etc.).
5080    #[tokio::test]
5081    async fn test_no_storage_options_without_vendor() {
5082        use lance_namespace::models::DeclareTableRequest;
5083
5084        let temp_dir = TempStdDir::default();
5085
5086        // No manifest, no credential vendor, but storage options with credentials
5087        let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5088            .manifest_enabled(false)
5089            .storage_option("aws_access_key_id", "AKID")
5090            .storage_option("aws_secret_access_key", "SECRET")
5091            .storage_option("region", "us-east-1")
5092            .build()
5093            .await
5094            .unwrap();
5095
5096        let schema = create_test_schema();
5097        let ipc_data = create_test_ipc_data(&schema);
5098
5099        // create_table
5100        let mut create_req = CreateTableRequest::new();
5101        create_req.id = Some(vec!["t1".to_string()]);
5102        namespace
5103            .create_table(create_req, bytes::Bytes::from(ipc_data))
5104            .await
5105            .unwrap();
5106
5107        // describe_table should not return storage options without a vendor
5108        let mut desc_req = DescribeTableRequest::new();
5109        desc_req.id = Some(vec!["t1".to_string()]);
5110        let resp = namespace.describe_table(desc_req).await.unwrap();
5111        assert!(resp.storage_options.is_none());
5112
5113        // declare_table should not return storage options without a vendor
5114        let mut decl_req = DeclareTableRequest::new();
5115        decl_req.id = Some(vec!["t2".to_string()]);
5116        let resp = namespace.declare_table(decl_req).await.unwrap();
5117        assert!(resp.storage_options.is_none());
5118    }
5119
5120    /// Same test with manifest mode enabled.
5121    #[tokio::test]
5122    async fn test_no_storage_options_without_vendor_manifest() {
5123        let temp_dir = TempStdDir::default();
5124
5125        let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5126            .storage_option("aws_access_key_id", "AKID")
5127            .storage_option("aws_secret_access_key", "SECRET")
5128            .storage_option("region", "us-east-1")
5129            .build()
5130            .await
5131            .unwrap();
5132
5133        let schema = create_test_schema();
5134        let ipc_data = create_test_ipc_data(&schema);
5135
5136        let mut create_req = CreateTableRequest::new();
5137        create_req.id = Some(vec!["t1".to_string()]);
5138        namespace
5139            .create_table(create_req, bytes::Bytes::from(ipc_data))
5140            .await
5141            .unwrap();
5142
5143        // describe_table through manifest should not return storage options without a vendor
5144        let mut desc_req = DescribeTableRequest::new();
5145        desc_req.id = Some(vec!["t1".to_string()]);
5146        let resp = namespace.describe_table(desc_req).await.unwrap();
5147        assert!(resp.storage_options.is_none());
5148    }
5149
5150    #[tokio::test]
5151    async fn test_from_properties_manifest_enabled() {
5152        let temp_dir = TempStdDir::default();
5153
5154        let mut properties = HashMap::new();
5155        properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5156        properties.insert("manifest_enabled".to_string(), "true".to_string());
5157        properties.insert("dir_listing_enabled".to_string(), "false".to_string());
5158
5159        let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5160        assert!(builder.manifest_enabled);
5161        assert!(!builder.dir_listing_enabled);
5162
5163        let namespace = builder.build().await.unwrap();
5164
5165        // Create test IPC data
5166        let schema = create_test_schema();
5167        let ipc_data = create_test_ipc_data(&schema);
5168
5169        // Create a table
5170        let mut request = CreateTableRequest::new();
5171        request.id = Some(vec!["test_table".to_string()]);
5172
5173        let response = namespace
5174            .create_table(request, bytes::Bytes::from(ipc_data))
5175            .await
5176            .unwrap();
5177
5178        assert!(response.location.is_some());
5179    }
5180
5181    #[tokio::test]
5182    async fn test_from_properties_dir_listing_enabled() {
5183        let temp_dir = TempStdDir::default();
5184
5185        let mut properties = HashMap::new();
5186        properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5187        properties.insert("manifest_enabled".to_string(), "false".to_string());
5188        properties.insert("dir_listing_enabled".to_string(), "true".to_string());
5189
5190        let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5191        assert!(!builder.manifest_enabled);
5192        assert!(builder.dir_listing_enabled);
5193
5194        let namespace = builder.build().await.unwrap();
5195
5196        // Create test IPC data
5197        let schema = create_test_schema();
5198        let ipc_data = create_test_ipc_data(&schema);
5199
5200        // Create a table
5201        let mut request = CreateTableRequest::new();
5202        request.id = Some(vec!["test_table".to_string()]);
5203
5204        let response = namespace
5205            .create_table(request, bytes::Bytes::from(ipc_data))
5206            .await
5207            .unwrap();
5208
5209        assert!(response.location.is_some());
5210    }
5211
5212    #[tokio::test]
5213    async fn test_from_properties_defaults() {
5214        let temp_dir = TempStdDir::default();
5215
5216        let mut properties = HashMap::new();
5217        properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5218
5219        let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5220        // Both should default to true
5221        assert!(builder.manifest_enabled);
5222        assert!(builder.dir_listing_enabled);
5223    }
5224
5225    #[tokio::test]
5226    async fn test_from_properties_with_storage_options() {
5227        let temp_dir = TempStdDir::default();
5228
5229        let mut properties = HashMap::new();
5230        properties.insert("root".to_string(), temp_dir.to_str().unwrap().to_string());
5231        properties.insert("manifest_enabled".to_string(), "true".to_string());
5232        properties.insert("storage.region".to_string(), "us-west-2".to_string());
5233        properties.insert("storage.bucket".to_string(), "my-bucket".to_string());
5234
5235        let builder = DirectoryNamespaceBuilder::from_properties(properties, None).unwrap();
5236        assert!(builder.manifest_enabled);
5237        assert!(builder.storage_options.is_some());
5238
5239        let storage_options = builder.storage_options.unwrap();
5240        assert_eq!(
5241            storage_options.get("region"),
5242            Some(&"us-west-2".to_string())
5243        );
5244        assert_eq!(
5245            storage_options.get("bucket"),
5246            Some(&"my-bucket".to_string())
5247        );
5248    }
5249
5250    #[tokio::test]
5251    async fn test_various_arrow_types() {
5252        let (namespace, _temp_dir) = create_test_namespace().await;
5253
5254        // Create schema with various types
5255        let fields = vec![
5256            JsonArrowField {
5257                name: "bool_col".to_string(),
5258                r#type: Box::new(JsonArrowDataType::new("bool".to_string())),
5259                nullable: true,
5260                metadata: None,
5261            },
5262            JsonArrowField {
5263                name: "int8_col".to_string(),
5264                r#type: Box::new(JsonArrowDataType::new("int8".to_string())),
5265                nullable: true,
5266                metadata: None,
5267            },
5268            JsonArrowField {
5269                name: "float64_col".to_string(),
5270                r#type: Box::new(JsonArrowDataType::new("float64".to_string())),
5271                nullable: true,
5272                metadata: None,
5273            },
5274            JsonArrowField {
5275                name: "binary_col".to_string(),
5276                r#type: Box::new(JsonArrowDataType::new("binary".to_string())),
5277                nullable: true,
5278                metadata: None,
5279            },
5280        ];
5281
5282        let schema = JsonArrowSchema {
5283            fields,
5284            metadata: None,
5285        };
5286
5287        // Create IPC data
5288        let ipc_data = create_test_ipc_data(&schema);
5289
5290        let mut request = CreateTableRequest::new();
5291        request.id = Some(vec!["complex_table".to_string()]);
5292
5293        let response = namespace
5294            .create_table(request, bytes::Bytes::from(ipc_data))
5295            .await
5296            .unwrap();
5297
5298        assert!(response.location.is_some());
5299    }
5300
5301    #[tokio::test]
5302    async fn test_connect_dir() {
5303        let temp_dir = TempStdDir::default();
5304
5305        let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
5306            .build()
5307            .await
5308            .unwrap();
5309
5310        // Test basic operation through the concrete type
5311        let mut request = ListTablesRequest::new();
5312        request.id = Some(vec![]);
5313        let response = namespace.list_tables(request).await.unwrap();
5314        assert_eq!(response.tables.len(), 0);
5315    }
5316
5317    #[tokio::test]
5318    async fn test_create_table_with_ipc_data() {
5319        use arrow::array::{Int32Array, StringArray};
5320        use arrow::ipc::writer::StreamWriter;
5321
5322        let (namespace, _temp_dir) = create_test_namespace().await;
5323
5324        // Create a schema with some fields
5325        let schema = create_test_schema();
5326
5327        // Create some test data that matches the schema
5328        let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
5329        let arrow_schema = Arc::new(arrow_schema);
5330
5331        // Create a RecordBatch with actual data
5332        let id_array = Int32Array::from(vec![1, 2, 3]);
5333        let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
5334        let batch = arrow::record_batch::RecordBatch::try_new(
5335            arrow_schema.clone(),
5336            vec![Arc::new(id_array), Arc::new(name_array)],
5337        )
5338        .unwrap();
5339
5340        // Write the batch to an IPC stream
5341        let mut buffer = Vec::new();
5342        {
5343            let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
5344            writer.write(&batch).unwrap();
5345            writer.finish().unwrap();
5346        }
5347
5348        // Create table with the IPC data
5349        let mut request = CreateTableRequest::new();
5350        request.id = Some(vec!["test_table_with_data".to_string()]);
5351
5352        let response = namespace
5353            .create_table(request, Bytes::from(buffer))
5354            .await
5355            .unwrap();
5356
5357        assert_eq!(response.version, Some(1));
5358        assert!(
5359            response
5360                .location
5361                .unwrap()
5362                .contains("test_table_with_data.lance")
5363        );
5364
5365        // Verify table exists
5366        let mut exists_request = TableExistsRequest::new();
5367        exists_request.id = Some(vec!["test_table_with_data".to_string()]);
5368        namespace.table_exists(exists_request).await.unwrap();
5369    }
5370
5371    #[tokio::test]
5372    async fn test_child_namespace_create_and_list() {
5373        let (namespace, _temp_dir) = create_test_namespace().await;
5374
5375        // Create multiple child namespaces
5376        for i in 1..=3 {
5377            let mut create_req = CreateNamespaceRequest::new();
5378            create_req.id = Some(vec![format!("ns{}", i)]);
5379            let result = namespace.create_namespace(create_req).await;
5380            assert!(result.is_ok(), "Failed to create child namespace ns{}", i);
5381        }
5382
5383        // List child namespaces
5384        let list_req = ListNamespacesRequest {
5385            id: Some(vec![]),
5386            ..Default::default()
5387        };
5388        let result = namespace.list_namespaces(list_req).await;
5389        assert!(result.is_ok());
5390        let namespaces = result.unwrap().namespaces;
5391        assert_eq!(namespaces.len(), 3);
5392        assert!(namespaces.contains(&"ns1".to_string()));
5393        assert!(namespaces.contains(&"ns2".to_string()));
5394        assert!(namespaces.contains(&"ns3".to_string()));
5395    }
5396
5397    #[tokio::test]
5398    async fn test_nested_namespace_hierarchy() {
5399        let (namespace, _temp_dir) = create_test_namespace().await;
5400
5401        // Create parent namespace
5402        let mut create_req = CreateNamespaceRequest::new();
5403        create_req.id = Some(vec!["parent".to_string()]);
5404        namespace.create_namespace(create_req).await.unwrap();
5405
5406        // Create nested children
5407        let mut create_req = CreateNamespaceRequest::new();
5408        create_req.id = Some(vec!["parent".to_string(), "child1".to_string()]);
5409        namespace.create_namespace(create_req).await.unwrap();
5410
5411        let mut create_req = CreateNamespaceRequest::new();
5412        create_req.id = Some(vec!["parent".to_string(), "child2".to_string()]);
5413        namespace.create_namespace(create_req).await.unwrap();
5414
5415        // List children of parent
5416        let list_req = ListNamespacesRequest {
5417            id: Some(vec!["parent".to_string()]),
5418            ..Default::default()
5419        };
5420        let result = namespace.list_namespaces(list_req).await;
5421        assert!(result.is_ok());
5422        let children = result.unwrap().namespaces;
5423        assert_eq!(children.len(), 2);
5424        assert!(children.contains(&"child1".to_string()));
5425        assert!(children.contains(&"child2".to_string()));
5426
5427        // List root should only show parent
5428        let list_req = ListNamespacesRequest {
5429            id: Some(vec![]),
5430            ..Default::default()
5431        };
5432        let result = namespace.list_namespaces(list_req).await;
5433        assert!(result.is_ok());
5434        let root_namespaces = result.unwrap().namespaces;
5435        assert_eq!(root_namespaces.len(), 1);
5436        assert_eq!(root_namespaces[0], "parent");
5437    }
5438
5439    #[tokio::test]
5440    async fn test_table_in_child_namespace() {
5441        let (namespace, _temp_dir) = create_test_namespace().await;
5442
5443        // Create child namespace
5444        let mut create_ns_req = CreateNamespaceRequest::new();
5445        create_ns_req.id = Some(vec!["test_ns".to_string()]);
5446        namespace.create_namespace(create_ns_req).await.unwrap();
5447
5448        // Create table in child namespace
5449        let schema = create_test_schema();
5450        let ipc_data = create_test_ipc_data(&schema);
5451        let mut create_table_req = CreateTableRequest::new();
5452        create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5453        let result = namespace
5454            .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5455            .await;
5456        assert!(result.is_ok(), "Failed to create table in child namespace");
5457
5458        // List tables in child namespace
5459        let list_req = ListTablesRequest {
5460            id: Some(vec!["test_ns".to_string()]),
5461            ..Default::default()
5462        };
5463        let result = namespace.list_tables(list_req).await;
5464        assert!(result.is_ok());
5465        let tables = result.unwrap().tables;
5466        assert_eq!(tables.len(), 1);
5467        assert_eq!(tables[0], "table1");
5468
5469        // Verify table exists
5470        let mut exists_req = TableExistsRequest::new();
5471        exists_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5472        let result = namespace.table_exists(exists_req).await;
5473        assert!(result.is_ok());
5474
5475        // Describe table in child namespace
5476        let mut describe_req = DescribeTableRequest::new();
5477        describe_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5478        let result = namespace.describe_table(describe_req).await;
5479        assert!(result.is_ok());
5480        let response = result.unwrap();
5481        assert!(response.location.is_some());
5482    }
5483
5484    #[tokio::test]
5485    async fn test_multiple_tables_in_child_namespace() {
5486        let (namespace, _temp_dir) = create_test_namespace().await;
5487
5488        // Create child namespace
5489        let mut create_ns_req = CreateNamespaceRequest::new();
5490        create_ns_req.id = Some(vec!["test_ns".to_string()]);
5491        namespace.create_namespace(create_ns_req).await.unwrap();
5492
5493        // Create multiple tables
5494        let schema = create_test_schema();
5495        let ipc_data = create_test_ipc_data(&schema);
5496        for i in 1..=3 {
5497            let mut create_table_req = CreateTableRequest::new();
5498            create_table_req.id = Some(vec!["test_ns".to_string(), format!("table{}", i)]);
5499            namespace
5500                .create_table(create_table_req, bytes::Bytes::from(ipc_data.clone()))
5501                .await
5502                .unwrap();
5503        }
5504
5505        // List tables
5506        let list_req = ListTablesRequest {
5507            id: Some(vec!["test_ns".to_string()]),
5508            ..Default::default()
5509        };
5510        let result = namespace.list_tables(list_req).await;
5511        assert!(result.is_ok());
5512        let tables = result.unwrap().tables;
5513        assert_eq!(tables.len(), 3);
5514        assert!(tables.contains(&"table1".to_string()));
5515        assert!(tables.contains(&"table2".to_string()));
5516        assert!(tables.contains(&"table3".to_string()));
5517    }
5518
5519    #[tokio::test]
5520    async fn test_drop_table_in_child_namespace() {
5521        let (namespace, _temp_dir) = create_test_namespace().await;
5522
5523        // Create child namespace
5524        let mut create_ns_req = CreateNamespaceRequest::new();
5525        create_ns_req.id = Some(vec!["test_ns".to_string()]);
5526        namespace.create_namespace(create_ns_req).await.unwrap();
5527
5528        // Create table
5529        let schema = create_test_schema();
5530        let ipc_data = create_test_ipc_data(&schema);
5531        let mut create_table_req = CreateTableRequest::new();
5532        create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5533        namespace
5534            .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5535            .await
5536            .unwrap();
5537
5538        // Drop table
5539        let mut drop_req = DropTableRequest::new();
5540        drop_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5541        let result = namespace.drop_table(drop_req).await;
5542        assert!(result.is_ok(), "Failed to drop table in child namespace");
5543
5544        // Verify table no longer exists
5545        let mut exists_req = TableExistsRequest::new();
5546        exists_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5547        let result = namespace.table_exists(exists_req).await;
5548        assert!(result.is_err());
5549    }
5550
5551    #[tokio::test]
5552    async fn test_deeply_nested_namespace() {
5553        let (namespace, _temp_dir) = create_test_namespace().await;
5554
5555        // Create deeply nested namespace hierarchy
5556        let mut create_req = CreateNamespaceRequest::new();
5557        create_req.id = Some(vec!["level1".to_string()]);
5558        namespace.create_namespace(create_req).await.unwrap();
5559
5560        let mut create_req = CreateNamespaceRequest::new();
5561        create_req.id = Some(vec!["level1".to_string(), "level2".to_string()]);
5562        namespace.create_namespace(create_req).await.unwrap();
5563
5564        let mut create_req = CreateNamespaceRequest::new();
5565        create_req.id = Some(vec![
5566            "level1".to_string(),
5567            "level2".to_string(),
5568            "level3".to_string(),
5569        ]);
5570        namespace.create_namespace(create_req).await.unwrap();
5571
5572        // Create table in deeply nested namespace
5573        let schema = create_test_schema();
5574        let ipc_data = create_test_ipc_data(&schema);
5575        let mut create_table_req = CreateTableRequest::new();
5576        create_table_req.id = Some(vec![
5577            "level1".to_string(),
5578            "level2".to_string(),
5579            "level3".to_string(),
5580            "table1".to_string(),
5581        ]);
5582        let result = namespace
5583            .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5584            .await;
5585        assert!(
5586            result.is_ok(),
5587            "Failed to create table in deeply nested namespace"
5588        );
5589
5590        // Verify table exists
5591        let mut exists_req = TableExistsRequest::new();
5592        exists_req.id = Some(vec![
5593            "level1".to_string(),
5594            "level2".to_string(),
5595            "level3".to_string(),
5596            "table1".to_string(),
5597        ]);
5598        let result = namespace.table_exists(exists_req).await;
5599        assert!(result.is_ok());
5600    }
5601
5602    #[tokio::test]
5603    async fn test_namespace_with_properties() {
5604        let (namespace, _temp_dir) = create_test_namespace().await;
5605
5606        // Create namespace with properties
5607        let mut properties = HashMap::new();
5608        properties.insert("owner".to_string(), "test_user".to_string());
5609        properties.insert("description".to_string(), "Test namespace".to_string());
5610
5611        let mut create_req = CreateNamespaceRequest::new();
5612        create_req.id = Some(vec!["test_ns".to_string()]);
5613        create_req.properties = Some(properties.clone());
5614        namespace.create_namespace(create_req).await.unwrap();
5615
5616        // Describe namespace and verify properties
5617        let describe_req = DescribeNamespaceRequest {
5618            id: Some(vec!["test_ns".to_string()]),
5619            ..Default::default()
5620        };
5621        let result = namespace.describe_namespace(describe_req).await;
5622        assert!(result.is_ok());
5623        let response = result.unwrap();
5624        assert!(response.properties.is_some());
5625        let props = response.properties.unwrap();
5626        assert_eq!(props.get("owner"), Some(&"test_user".to_string()));
5627        assert_eq!(
5628            props.get("description"),
5629            Some(&"Test namespace".to_string())
5630        );
5631    }
5632
5633    #[tokio::test]
5634    async fn test_cannot_drop_namespace_with_tables() {
5635        let (namespace, _temp_dir) = create_test_namespace().await;
5636
5637        // Create namespace
5638        let mut create_ns_req = CreateNamespaceRequest::new();
5639        create_ns_req.id = Some(vec!["test_ns".to_string()]);
5640        namespace.create_namespace(create_ns_req).await.unwrap();
5641
5642        // Create table in namespace
5643        let schema = create_test_schema();
5644        let ipc_data = create_test_ipc_data(&schema);
5645        let mut create_table_req = CreateTableRequest::new();
5646        create_table_req.id = Some(vec!["test_ns".to_string(), "table1".to_string()]);
5647        namespace
5648            .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5649            .await
5650            .unwrap();
5651
5652        // Try to drop namespace - should fail
5653        let mut drop_req = DropNamespaceRequest::new();
5654        drop_req.id = Some(vec!["test_ns".to_string()]);
5655        let result = namespace.drop_namespace(drop_req).await;
5656        assert!(
5657            result.is_err(),
5658            "Should not be able to drop namespace with tables"
5659        );
5660    }
5661
5662    #[tokio::test]
5663    async fn test_isolation_between_namespaces() {
5664        let (namespace, _temp_dir) = create_test_namespace().await;
5665
5666        // Create two namespaces
5667        let mut create_req = CreateNamespaceRequest::new();
5668        create_req.id = Some(vec!["ns1".to_string()]);
5669        namespace.create_namespace(create_req).await.unwrap();
5670
5671        let mut create_req = CreateNamespaceRequest::new();
5672        create_req.id = Some(vec!["ns2".to_string()]);
5673        namespace.create_namespace(create_req).await.unwrap();
5674
5675        // Create table with same name in both namespaces
5676        let schema = create_test_schema();
5677        let ipc_data = create_test_ipc_data(&schema);
5678
5679        let mut create_table_req = CreateTableRequest::new();
5680        create_table_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5681        namespace
5682            .create_table(create_table_req, bytes::Bytes::from(ipc_data.clone()))
5683            .await
5684            .unwrap();
5685
5686        let mut create_table_req = CreateTableRequest::new();
5687        create_table_req.id = Some(vec!["ns2".to_string(), "table1".to_string()]);
5688        namespace
5689            .create_table(create_table_req, bytes::Bytes::from(ipc_data))
5690            .await
5691            .unwrap();
5692
5693        // List tables in each namespace
5694        let list_req = ListTablesRequest {
5695            id: Some(vec!["ns1".to_string()]),
5696            page_token: None,
5697            limit: None,
5698            ..Default::default()
5699        };
5700        let result = namespace.list_tables(list_req).await.unwrap();
5701        assert_eq!(result.tables.len(), 1);
5702        assert_eq!(result.tables[0], "table1");
5703
5704        let list_req = ListTablesRequest {
5705            id: Some(vec!["ns2".to_string()]),
5706            page_token: None,
5707            limit: None,
5708            ..Default::default()
5709        };
5710        let result = namespace.list_tables(list_req).await.unwrap();
5711        assert_eq!(result.tables.len(), 1);
5712        assert_eq!(result.tables[0], "table1");
5713
5714        // Drop table in ns1 shouldn't affect ns2
5715        let mut drop_req = DropTableRequest::new();
5716        drop_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5717        namespace.drop_table(drop_req).await.unwrap();
5718
5719        // Verify ns1 table is gone but ns2 table still exists
5720        let mut exists_req = TableExistsRequest::new();
5721        exists_req.id = Some(vec!["ns1".to_string(), "table1".to_string()]);
5722        assert!(namespace.table_exists(exists_req).await.is_err());
5723
5724        let mut exists_req = TableExistsRequest::new();
5725        exists_req.id = Some(vec!["ns2".to_string(), "table1".to_string()]);
5726        assert!(namespace.table_exists(exists_req).await.is_ok());
5727    }
5728
5729    #[tokio::test]
5730    async fn test_migrate_directory_tables() {
5731        let temp_dir = TempStdDir::default();
5732        let temp_path = temp_dir.to_str().unwrap();
5733
5734        // Step 1: Create tables in directory-only mode
5735        let dir_only_ns = DirectoryNamespaceBuilder::new(temp_path)
5736            .manifest_enabled(false)
5737            .dir_listing_enabled(true)
5738            .build()
5739            .await
5740            .unwrap();
5741
5742        // Create some tables
5743        let schema = create_test_schema();
5744        let ipc_data = create_test_ipc_data(&schema);
5745
5746        for i in 1..=3 {
5747            let mut create_req = CreateTableRequest::new();
5748            create_req.id = Some(vec![format!("table{}", i)]);
5749            dir_only_ns
5750                .create_table(create_req, bytes::Bytes::from(ipc_data.clone()))
5751                .await
5752                .unwrap();
5753        }
5754
5755        drop(dir_only_ns);
5756
5757        // Step 2: Create namespace with dual mode (manifest + directory listing)
5758        let dual_mode_ns = DirectoryNamespaceBuilder::new(temp_path)
5759            .manifest_enabled(true)
5760            .dir_listing_enabled(true)
5761            .build()
5762            .await
5763            .unwrap();
5764
5765        // Before migration, tables should be visible (via directory listing fallback)
5766        let mut list_req = ListTablesRequest::new();
5767        list_req.id = Some(vec![]);
5768        let tables = dual_mode_ns.list_tables(list_req).await.unwrap().tables;
5769        assert_eq!(tables.len(), 3);
5770
5771        // Run migration
5772        let migrated_count = dual_mode_ns.migrate().await.unwrap();
5773        assert_eq!(migrated_count, 3, "Should migrate all 3 tables");
5774
5775        // Verify tables are now in manifest
5776        let mut list_req = ListTablesRequest::new();
5777        list_req.id = Some(vec![]);
5778        let tables = dual_mode_ns.list_tables(list_req).await.unwrap().tables;
5779        assert_eq!(tables.len(), 3);
5780
5781        // Run migration again - should be idempotent
5782        let migrated_count = dual_mode_ns.migrate().await.unwrap();
5783        assert_eq!(
5784            migrated_count, 0,
5785            "Should not migrate already-migrated tables"
5786        );
5787
5788        drop(dual_mode_ns);
5789
5790        // Step 3: Create namespace with manifest-only mode
5791        let manifest_only_ns = DirectoryNamespaceBuilder::new(temp_path)
5792            .manifest_enabled(true)
5793            .dir_listing_enabled(false)
5794            .build()
5795            .await
5796            .unwrap();
5797
5798        // Tables should still be accessible (now from manifest only)
5799        let mut list_req = ListTablesRequest::new();
5800        list_req.id = Some(vec![]);
5801        let tables = manifest_only_ns.list_tables(list_req).await.unwrap().tables;
5802        assert_eq!(tables.len(), 3);
5803        assert!(tables.contains(&"table1".to_string()));
5804        assert!(tables.contains(&"table2".to_string()));
5805        assert!(tables.contains(&"table3".to_string()));
5806    }
5807
5808    #[tokio::test]
5809    async fn test_migrate_without_manifest() {
5810        let temp_dir = TempStdDir::default();
5811        let temp_path = temp_dir.to_str().unwrap();
5812
5813        // Create namespace without manifest
5814        let namespace = DirectoryNamespaceBuilder::new(temp_path)
5815            .manifest_enabled(false)
5816            .dir_listing_enabled(true)
5817            .build()
5818            .await
5819            .unwrap();
5820
5821        // migrate() should return 0 when manifest is not enabled
5822        let migrated_count = namespace.migrate().await.unwrap();
5823        assert_eq!(migrated_count, 0);
5824    }
5825
5826    #[tokio::test]
5827    async fn test_register_table() {
5828        use lance_namespace::models::{RegisterTableRequest, TableExistsRequest};
5829
5830        let temp_dir = TempStdDir::default();
5831        let temp_path = temp_dir.to_str().unwrap();
5832
5833        let namespace = DirectoryNamespaceBuilder::new(temp_path)
5834            .dir_listing_to_manifest_migration_enabled(true)
5835            .build()
5836            .await
5837            .unwrap();
5838
5839        // Create a physical table first using lance directly
5840        let schema = create_test_schema();
5841        let ipc_data = create_test_ipc_data(&schema);
5842
5843        let table_uri = format!("{}/external_table.lance", temp_path);
5844        let cursor = Cursor::new(ipc_data);
5845        let stream_reader = StreamReader::try_new(cursor, None).unwrap();
5846        let batches: Vec<_> = stream_reader
5847            .collect::<std::result::Result<Vec<_>, _>>()
5848            .unwrap();
5849        let schema = batches[0].schema();
5850        let batch_results: Vec<_> = batches.into_iter().map(Ok).collect();
5851        let reader = RecordBatchIterator::new(batch_results, schema);
5852        Dataset::write(Box::new(reader), &table_uri, None)
5853            .await
5854            .unwrap();
5855
5856        // Register the table
5857        let mut register_req = RegisterTableRequest::new("external_table.lance".to_string());
5858        register_req.id = Some(vec!["registered_table".to_string()]);
5859
5860        let response = namespace.register_table(register_req).await.unwrap();
5861        assert_eq!(response.location, Some("external_table.lance".to_string()));
5862
5863        // Verify table exists in namespace
5864        let mut exists_req = TableExistsRequest::new();
5865        exists_req.id = Some(vec!["registered_table".to_string()]);
5866        assert!(namespace.table_exists(exists_req).await.is_ok());
5867
5868        // Verify we can list the table
5869        let mut list_req = ListTablesRequest::new();
5870        list_req.id = Some(vec![]);
5871        let tables = namespace.list_tables(list_req).await.unwrap();
5872        assert!(tables.tables.contains(&"registered_table".to_string()));
5873    }
5874
5875    #[tokio::test]
5876    async fn test_register_table_duplicate_fails() {
5877        use lance_namespace::models::RegisterTableRequest;
5878
5879        let temp_dir = TempStdDir::default();
5880        let temp_path = temp_dir.to_str().unwrap();
5881
5882        let namespace = DirectoryNamespaceBuilder::new(temp_path)
5883            .build()
5884            .await
5885            .unwrap();
5886
5887        // Register a table
5888        let mut register_req = RegisterTableRequest::new("test_table.lance".to_string());
5889        register_req.id = Some(vec!["test_table".to_string()]);
5890
5891        namespace
5892            .register_table(register_req.clone())
5893            .await
5894            .unwrap();
5895
5896        // Try to register again - should fail
5897        let result = namespace.register_table(register_req).await;
5898        assert!(result.is_err());
5899        assert!(result.unwrap_err().to_string().contains("already exists"));
5900    }
5901
5902    #[tokio::test]
5903    async fn test_deregister_table() {
5904        use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
5905
5906        let temp_dir = TempStdDir::default();
5907        let temp_path = temp_dir.to_str().unwrap();
5908
5909        // Create namespace with manifest-only mode (no directory listing fallback)
5910        // This ensures deregistered tables are truly invisible
5911        let namespace = DirectoryNamespaceBuilder::new(temp_path)
5912            .manifest_enabled(true)
5913            .dir_listing_enabled(false)
5914            .build()
5915            .await
5916            .unwrap();
5917
5918        // Create a table
5919        let schema = create_test_schema();
5920        let ipc_data = create_test_ipc_data(&schema);
5921
5922        let mut create_req = CreateTableRequest::new();
5923        create_req.id = Some(vec!["test_table".to_string()]);
5924        namespace
5925            .create_table(create_req, bytes::Bytes::from(ipc_data))
5926            .await
5927            .unwrap();
5928
5929        // Verify table exists
5930        let mut exists_req = TableExistsRequest::new();
5931        exists_req.id = Some(vec!["test_table".to_string()]);
5932        assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
5933
5934        // Deregister the table
5935        let mut deregister_req = DeregisterTableRequest::new();
5936        deregister_req.id = Some(vec!["test_table".to_string()]);
5937        let response = namespace.deregister_table(deregister_req).await.unwrap();
5938
5939        // Should return location and id
5940        assert!(
5941            response.location.is_some(),
5942            "Deregister should return location"
5943        );
5944        let location = response.location.as_ref().unwrap();
5945        // Location should be a proper file:// URI with the temp path
5946        // Use uri_to_url to normalize the temp path to a URL for comparison
5947        let expected_url = lance_io::object_store::uri_to_url(temp_path)
5948            .expect("Failed to convert temp path to URL");
5949        let expected_prefix = expected_url.to_string();
5950        assert!(
5951            location.starts_with(&expected_prefix),
5952            "Location should start with '{}', got: {}",
5953            expected_prefix,
5954            location
5955        );
5956        assert!(
5957            location.contains("test_table"),
5958            "Location should contain table name: {}",
5959            location
5960        );
5961        assert_eq!(response.id, Some(vec!["test_table".to_string()]));
5962
5963        // Verify table no longer exists in namespace (removed from manifest)
5964        assert!(namespace.table_exists(exists_req).await.is_err());
5965
5966        // Verify physical data still exists at the returned location
5967        let dataset = Dataset::open(location).await;
5968        assert!(
5969            dataset.is_ok(),
5970            "Physical table data should still exist at {}",
5971            location
5972        );
5973    }
5974
5975    #[tokio::test]
5976    async fn test_deregister_table_in_child_namespace() {
5977        use lance_namespace::models::{
5978            CreateNamespaceRequest, DeregisterTableRequest, TableExistsRequest,
5979        };
5980
5981        let temp_dir = TempStdDir::default();
5982        let temp_path = temp_dir.to_str().unwrap();
5983
5984        let namespace = DirectoryNamespaceBuilder::new(temp_path)
5985            .build()
5986            .await
5987            .unwrap();
5988
5989        // Create child namespace
5990        let mut create_ns_req = CreateNamespaceRequest::new();
5991        create_ns_req.id = Some(vec!["test_ns".to_string()]);
5992        namespace.create_namespace(create_ns_req).await.unwrap();
5993
5994        // Create a table in the child namespace
5995        let schema = create_test_schema();
5996        let ipc_data = create_test_ipc_data(&schema);
5997
5998        let mut create_req = CreateTableRequest::new();
5999        create_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6000        namespace
6001            .create_table(create_req, bytes::Bytes::from(ipc_data))
6002            .await
6003            .unwrap();
6004
6005        // Deregister the table
6006        let mut deregister_req = DeregisterTableRequest::new();
6007        deregister_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6008        let response = namespace.deregister_table(deregister_req).await.unwrap();
6009
6010        // Should return location and id in child namespace
6011        assert!(
6012            response.location.is_some(),
6013            "Deregister should return location"
6014        );
6015        let location = response.location.as_ref().unwrap();
6016        // Location should be a proper file:// URI with the temp path
6017        // Use uri_to_url to normalize the temp path to a URL for comparison
6018        let expected_url = lance_io::object_store::uri_to_url(temp_path)
6019            .expect("Failed to convert temp path to URL");
6020        let expected_prefix = expected_url.to_string();
6021        assert!(
6022            location.starts_with(&expected_prefix),
6023            "Location should start with '{}', got: {}",
6024            expected_prefix,
6025            location
6026        );
6027        assert!(
6028            location.contains("test_ns") && location.contains("test_table"),
6029            "Location should contain namespace and table name: {}",
6030            location
6031        );
6032        assert_eq!(
6033            response.id,
6034            Some(vec!["test_ns".to_string(), "test_table".to_string()])
6035        );
6036
6037        // Verify table no longer exists
6038        let mut exists_req = TableExistsRequest::new();
6039        exists_req.id = Some(vec!["test_ns".to_string(), "test_table".to_string()]);
6040        assert!(namespace.table_exists(exists_req).await.is_err());
6041    }
6042
6043    #[tokio::test]
6044    async fn test_register_without_manifest_fails() {
6045        use lance_namespace::models::RegisterTableRequest;
6046
6047        let temp_dir = TempStdDir::default();
6048        let temp_path = temp_dir.to_str().unwrap();
6049
6050        // Create namespace without manifest
6051        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6052            .manifest_enabled(false)
6053            .build()
6054            .await
6055            .unwrap();
6056
6057        // Try to register - should fail (register requires manifest)
6058        let mut register_req = RegisterTableRequest::new("test_table.lance".to_string());
6059        register_req.id = Some(vec!["test_table".to_string()]);
6060        let result = namespace.register_table(register_req).await;
6061        assert!(result.is_err());
6062        assert!(
6063            result
6064                .unwrap_err()
6065                .to_string()
6066                .contains("manifest mode is enabled")
6067        );
6068
6069        // Note: deregister_table now works in V1 mode via .lance-deregistered marker files
6070        // See test_deregister_table_v1_mode for that test case
6071    }
6072
6073    #[tokio::test]
6074    async fn test_register_table_rejects_absolute_uri() {
6075        use lance_namespace::models::RegisterTableRequest;
6076
6077        let temp_dir = TempStdDir::default();
6078        let temp_path = temp_dir.to_str().unwrap();
6079
6080        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6081            .build()
6082            .await
6083            .unwrap();
6084
6085        // Try to register with absolute URI - should fail
6086        let mut register_req = RegisterTableRequest::new("s3://bucket/table.lance".to_string());
6087        register_req.id = Some(vec!["test_table".to_string()]);
6088        let result = namespace.register_table(register_req).await;
6089        assert!(result.is_err());
6090        let err_msg = result.unwrap_err().to_string();
6091        assert!(err_msg.contains("Absolute URIs are not allowed"));
6092    }
6093
6094    #[tokio::test]
6095    async fn test_register_table_rejects_absolute_path() {
6096        use lance_namespace::models::RegisterTableRequest;
6097
6098        let temp_dir = TempStdDir::default();
6099        let temp_path = temp_dir.to_str().unwrap();
6100
6101        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6102            .build()
6103            .await
6104            .unwrap();
6105
6106        // Try to register with absolute path - should fail
6107        let mut register_req = RegisterTableRequest::new("/tmp/table.lance".to_string());
6108        register_req.id = Some(vec!["test_table".to_string()]);
6109        let result = namespace.register_table(register_req).await;
6110        assert!(result.is_err());
6111        let err_msg = result.unwrap_err().to_string();
6112        assert!(err_msg.contains("Absolute paths are not allowed"));
6113    }
6114
6115    #[tokio::test]
6116    async fn test_register_table_rejects_path_traversal() {
6117        use lance_namespace::models::RegisterTableRequest;
6118
6119        let temp_dir = TempStdDir::default();
6120        let temp_path = temp_dir.to_str().unwrap();
6121
6122        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6123            .build()
6124            .await
6125            .unwrap();
6126
6127        // Try to register with path traversal - should fail
6128        let mut register_req = RegisterTableRequest::new("../outside/table.lance".to_string());
6129        register_req.id = Some(vec!["test_table".to_string()]);
6130        let result = namespace.register_table(register_req).await;
6131        assert!(result.is_err());
6132        let err_msg = result.unwrap_err().to_string();
6133        assert!(err_msg.contains("Path traversal is not allowed"));
6134    }
6135
6136    #[tokio::test]
6137    async fn test_namespace_write() {
6138        use arrow::array::Int32Array;
6139        use arrow::datatypes::{DataType, Field as ArrowField, Schema as ArrowSchema};
6140        use arrow::record_batch::{RecordBatch, RecordBatchIterator};
6141        use lance::dataset::{Dataset, WriteMode, WriteParams};
6142        use lance_namespace::LanceNamespace;
6143
6144        let (namespace, _temp_dir) = create_test_namespace().await;
6145        let namespace = Arc::new(namespace) as Arc<dyn LanceNamespace>;
6146
6147        // Use child namespace instead of root
6148        let table_id = vec!["test_ns".to_string(), "test_table".to_string()];
6149        let schema = Arc::new(ArrowSchema::new(vec![
6150            ArrowField::new("a", DataType::Int32, false),
6151            ArrowField::new("b", DataType::Int32, false),
6152        ]));
6153
6154        // Test 1: CREATE mode
6155        let data1 = RecordBatch::try_new(
6156            schema.clone(),
6157            vec![
6158                Arc::new(Int32Array::from(vec![1, 2, 3])),
6159                Arc::new(Int32Array::from(vec![10, 20, 30])),
6160            ],
6161        )
6162        .unwrap();
6163
6164        let reader1 = RecordBatchIterator::new(vec![data1].into_iter().map(Ok), schema.clone());
6165        let dataset =
6166            Dataset::write_into_namespace(reader1, namespace.clone(), table_id.clone(), None)
6167                .await
6168                .unwrap();
6169
6170        assert_eq!(dataset.count_rows(None).await.unwrap(), 3);
6171        assert_eq!(dataset.version().version, 1);
6172
6173        // Test 2: APPEND mode
6174        let data2 = RecordBatch::try_new(
6175            schema.clone(),
6176            vec![
6177                Arc::new(Int32Array::from(vec![4, 5])),
6178                Arc::new(Int32Array::from(vec![40, 50])),
6179            ],
6180        )
6181        .unwrap();
6182
6183        let params_append = WriteParams {
6184            mode: WriteMode::Append,
6185            ..Default::default()
6186        };
6187
6188        let reader2 = RecordBatchIterator::new(vec![data2].into_iter().map(Ok), schema.clone());
6189        let dataset = Dataset::write_into_namespace(
6190            reader2,
6191            namespace.clone(),
6192            table_id.clone(),
6193            Some(params_append),
6194        )
6195        .await
6196        .unwrap();
6197
6198        assert_eq!(dataset.count_rows(None).await.unwrap(), 5);
6199        assert_eq!(dataset.version().version, 2);
6200
6201        // Test 3: OVERWRITE mode
6202        let data3 = RecordBatch::try_new(
6203            schema.clone(),
6204            vec![
6205                Arc::new(Int32Array::from(vec![100, 200])),
6206                Arc::new(Int32Array::from(vec![1000, 2000])),
6207            ],
6208        )
6209        .unwrap();
6210
6211        let params_overwrite = WriteParams {
6212            mode: WriteMode::Overwrite,
6213            ..Default::default()
6214        };
6215
6216        let reader3 = RecordBatchIterator::new(vec![data3].into_iter().map(Ok), schema.clone());
6217        let dataset = Dataset::write_into_namespace(
6218            reader3,
6219            namespace.clone(),
6220            table_id.clone(),
6221            Some(params_overwrite),
6222        )
6223        .await
6224        .unwrap();
6225
6226        assert_eq!(dataset.count_rows(None).await.unwrap(), 2);
6227        assert_eq!(dataset.version().version, 3);
6228
6229        // Verify old data was replaced
6230        let result = dataset.scan().try_into_batch().await.unwrap();
6231        let a_col = result
6232            .column_by_name("a")
6233            .unwrap()
6234            .as_any()
6235            .downcast_ref::<Int32Array>()
6236            .unwrap();
6237        assert_eq!(a_col.values(), &[100, 200]);
6238    }
6239
6240    // ============================================================
6241    // Tests for declare_table
6242    // ============================================================
6243
6244    #[tokio::test]
6245    async fn test_declare_table_v1_mode() {
6246        use lance_namespace::models::{
6247            DeclareTableRequest, DescribeTableRequest, ListTablesRequest, TableExistsRequest,
6248        };
6249
6250        let temp_dir = TempStdDir::default();
6251        let temp_path = temp_dir.to_str().unwrap();
6252
6253        // Create namespace in V1 mode (no manifest)
6254        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6255            .manifest_enabled(false)
6256            .build()
6257            .await
6258            .unwrap();
6259
6260        // Declare a table
6261        let mut declare_req = DeclareTableRequest::new();
6262        declare_req.id = Some(vec!["test_table".to_string()]);
6263        let response = namespace.declare_table(declare_req).await.unwrap();
6264
6265        // Should return location
6266        assert!(response.location.is_some());
6267        let location = response.location.as_ref().unwrap();
6268        assert!(location.ends_with("test_table.lance"));
6269
6270        // Table should exist (via reserved file)
6271        let mut exists_req = TableExistsRequest::new();
6272        exists_req.id = Some(vec!["test_table".to_string()]);
6273        assert!(namespace.table_exists(exists_req).await.is_ok());
6274
6275        // Describe should work but return no version/schema (not written yet)
6276        let mut describe_req = DescribeTableRequest::new();
6277        describe_req.id = Some(vec!["test_table".to_string()]);
6278        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6279        assert!(describe_response.location.is_some());
6280        assert!(describe_response.version.is_none()); // Not written yet
6281        assert!(describe_response.schema.is_none()); // Not written yet
6282        assert_eq!(describe_response.is_only_declared, None);
6283
6284        let mut describe_req = DescribeTableRequest::new();
6285        describe_req.id = Some(vec!["test_table".to_string()]);
6286        describe_req.check_declared = Some(true);
6287        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6288        assert_eq!(describe_response.is_only_declared, Some(true));
6289
6290        let mut list_req = ListTablesRequest::new();
6291        list_req.id = Some(vec![]);
6292        let list_response = namespace.list_tables(list_req.clone()).await.unwrap();
6293        assert_eq!(list_response.tables, vec!["test_table".to_string()]);
6294
6295        list_req.include_declared = Some(false);
6296        let list_response = namespace.list_tables(list_req).await.unwrap();
6297        assert!(list_response.tables.is_empty());
6298    }
6299
6300    #[tokio::test]
6301    async fn test_insert_into_declared_table_promotes_it_from_declared_state() {
6302        use lance_namespace::models::{
6303            DeclareTableRequest, DescribeTableRequest, InsertIntoTableRequest,
6304        };
6305
6306        let temp_dir = TempStdDir::default();
6307        let temp_path = temp_dir.to_str().unwrap();
6308
6309        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6310            .manifest_enabled(false)
6311            .build()
6312            .await
6313            .unwrap();
6314
6315        let mut declare_req = DeclareTableRequest::new();
6316        declare_req.id = Some(vec!["test_table".to_string()]);
6317        namespace.declare_table(declare_req).await.unwrap();
6318
6319        let schema = create_test_schema();
6320        let ipc_data = create_test_ipc_data(&schema);
6321        let mut insert_req = InsertIntoTableRequest::new();
6322        insert_req.id = Some(vec!["test_table".to_string()]);
6323        namespace
6324            .insert_into_table(insert_req, bytes::Bytes::from(ipc_data))
6325            .await
6326            .unwrap();
6327
6328        let mut describe_req = DescribeTableRequest::new();
6329        describe_req.id = Some(vec!["test_table".to_string()]);
6330        describe_req.load_detailed_metadata = Some(true);
6331        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6332
6333        assert_eq!(describe_response.is_only_declared, Some(false));
6334        assert_eq!(describe_response.version, Some(1));
6335        assert!(describe_response.schema.is_some());
6336
6337        let mut list_req = ListTablesRequest::new();
6338        list_req.id = Some(vec![]);
6339        list_req.include_declared = Some(false);
6340        assert_eq!(
6341            namespace.list_tables(list_req).await.unwrap().tables,
6342            vec!["test_table".to_string()]
6343        );
6344    }
6345
6346    #[tokio::test]
6347    async fn test_create_table_after_declare_table_v1_mode_creates_table() {
6348        use lance_namespace::models::{
6349            DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6350        };
6351
6352        let temp_dir = TempStdDir::default();
6353        let temp_path = temp_dir.to_str().unwrap();
6354
6355        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6356            .manifest_enabled(false)
6357            .build()
6358            .await
6359            .unwrap();
6360
6361        let mut declare_req = DeclareTableRequest::new();
6362        declare_req.id = Some(vec!["test_table".to_string()]);
6363        namespace.declare_table(declare_req).await.unwrap();
6364
6365        let mut create_req = CreateTableRequest::new();
6366        create_req.id = Some(vec!["test_table".to_string()]);
6367        let response = namespace
6368            .create_table(
6369                create_req,
6370                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6371            )
6372            .await
6373            .unwrap();
6374
6375        assert_eq!(response.version, Some(1));
6376
6377        let mut describe_req = DescribeTableRequest::new();
6378        describe_req.id = Some(vec!["test_table".to_string()]);
6379        describe_req.load_detailed_metadata = Some(true);
6380        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6381        assert_eq!(describe_response.is_only_declared, Some(false));
6382        assert_eq!(describe_response.version, Some(1));
6383
6384        let mut list_req = ListTablesRequest::new();
6385        list_req.id = Some(vec![]);
6386        list_req.include_declared = Some(false);
6387        assert_eq!(
6388            namespace.list_tables(list_req).await.unwrap().tables,
6389            vec!["test_table".to_string()]
6390        );
6391    }
6392
6393    #[tokio::test]
6394    async fn test_insert_into_declared_table_with_manifest_promotes_it() {
6395        use lance_namespace::models::{
6396            DeclareTableRequest, DescribeTableRequest, InsertIntoTableRequest, ListTablesRequest,
6397        };
6398
6399        let temp_dir = TempStdDir::default();
6400        let temp_path = temp_dir.to_str().unwrap();
6401
6402        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6403            .manifest_enabled(true)
6404            .dir_listing_enabled(false)
6405            .build()
6406            .await
6407            .unwrap();
6408
6409        let mut declare_req = DeclareTableRequest::new();
6410        declare_req.id = Some(vec!["test_table".to_string()]);
6411        namespace.declare_table(declare_req).await.unwrap();
6412
6413        let mut insert_req = InsertIntoTableRequest::new();
6414        insert_req.id = Some(vec!["test_table".to_string()]);
6415        namespace
6416            .insert_into_table(
6417                insert_req,
6418                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6419            )
6420            .await
6421            .unwrap();
6422
6423        let mut describe_req = DescribeTableRequest::new();
6424        describe_req.id = Some(vec!["test_table".to_string()]);
6425        describe_req.load_detailed_metadata = Some(true);
6426        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6427        assert_eq!(describe_response.is_only_declared, Some(false));
6428        assert_eq!(describe_response.version, Some(1));
6429
6430        let mut list_req = ListTablesRequest::new();
6431        list_req.id = Some(vec![]);
6432        list_req.include_declared = Some(false);
6433        assert_eq!(
6434            namespace.list_tables(list_req).await.unwrap().tables,
6435            vec!["test_table".to_string()]
6436        );
6437    }
6438
6439    #[tokio::test]
6440    async fn test_create_table_after_declare_table_with_manifest_creates_table() {
6441        use lance_namespace::models::{
6442            CreateTableRequest, DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6443        };
6444
6445        let temp_dir = TempStdDir::default();
6446        let temp_path = temp_dir.to_str().unwrap();
6447
6448        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6449            .manifest_enabled(true)
6450            .dir_listing_enabled(false)
6451            .build()
6452            .await
6453            .unwrap();
6454
6455        let mut declare_req = DeclareTableRequest::new();
6456        declare_req.id = Some(vec!["test_table".to_string()]);
6457        declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6458        namespace.declare_table(declare_req).await.unwrap();
6459
6460        let mut create_req = CreateTableRequest::new();
6461        create_req.id = Some(vec!["test_table".to_string()]);
6462        create_req.mode = Some("Overwrite".to_string());
6463        let response = namespace
6464            .create_table(
6465                create_req,
6466                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6467            )
6468            .await
6469            .unwrap();
6470
6471        assert_eq!(response.version, Some(1));
6472        assert_eq!(
6473            response
6474                .properties
6475                .as_ref()
6476                .and_then(|properties| properties.get("owner")),
6477            Some(&"alice".to_string())
6478        );
6479
6480        let mut describe_req = DescribeTableRequest::new();
6481        describe_req.id = Some(vec!["test_table".to_string()]);
6482        describe_req.load_detailed_metadata = Some(true);
6483        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6484        assert_eq!(describe_response.is_only_declared, Some(false));
6485        assert_eq!(describe_response.version, Some(1));
6486        assert_eq!(
6487            describe_response
6488                .properties
6489                .as_ref()
6490                .and_then(|properties| properties.get("owner")),
6491            Some(&"alice".to_string())
6492        );
6493
6494        let mut list_req = ListTablesRequest::new();
6495        list_req.id = Some(vec![]);
6496        list_req.include_declared = Some(false);
6497        assert_eq!(
6498            namespace.list_tables(list_req).await.unwrap().tables,
6499            vec!["test_table".to_string()]
6500        );
6501    }
6502
6503    #[tokio::test]
6504    async fn test_create_table_after_declare_table_with_manifest_rejects_new_properties() {
6505        use lance_namespace::models::{CreateTableRequest, DeclareTableRequest};
6506
6507        let temp_dir = TempStdDir::default();
6508        let temp_path = temp_dir.to_str().unwrap();
6509
6510        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6511            .manifest_enabled(true)
6512            .dir_listing_enabled(false)
6513            .build()
6514            .await
6515            .unwrap();
6516
6517        let mut declare_req = DeclareTableRequest::new();
6518        declare_req.id = Some(vec!["test_table".to_string()]);
6519        declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6520        namespace.declare_table(declare_req).await.unwrap();
6521
6522        let mut create_req = CreateTableRequest::new();
6523        create_req.id = Some(vec!["test_table".to_string()]);
6524        create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6525
6526        let result = namespace
6527            .create_table(
6528                create_req,
6529                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6530            )
6531            .await;
6532
6533        assert!(result.is_err());
6534        assert!(
6535            result
6536                .unwrap_err()
6537                .to_string()
6538                .contains("cannot set properties for already declared table")
6539        );
6540    }
6541
6542    #[tokio::test]
6543    async fn test_create_table_with_manifest_exist_ok_keeps_existing_table() {
6544        use lance_namespace::models::{CreateTableRequest, DescribeTableRequest};
6545
6546        let temp_dir = TempStdDir::default();
6547        let temp_path = temp_dir.to_str().unwrap();
6548
6549        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6550            .manifest_enabled(true)
6551            .dir_listing_enabled(false)
6552            .build()
6553            .await
6554            .unwrap();
6555
6556        let mut create_req = CreateTableRequest::new();
6557        create_req.id = Some(vec!["test_table".to_string()]);
6558        create_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6559        namespace
6560            .create_table(
6561                create_req,
6562                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6563            )
6564            .await
6565            .unwrap();
6566
6567        let mut create_req = CreateTableRequest::new();
6568        create_req.id = Some(vec!["test_table".to_string()]);
6569        create_req.mode = Some("ExistOk".to_string());
6570        create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6571        let response = namespace
6572            .create_table(
6573                create_req,
6574                bytes::Bytes::from(create_single_row_test_ipc_data()),
6575            )
6576            .await
6577            .unwrap();
6578
6579        assert_eq!(
6580            response
6581                .properties
6582                .as_ref()
6583                .and_then(|properties| properties.get("owner")),
6584            Some(&"alice".to_string())
6585        );
6586        assert_eq!(
6587            open_dataset(&namespace, "test_table")
6588                .await
6589                .count_rows(None)
6590                .await
6591                .unwrap(),
6592            2
6593        );
6594
6595        let mut describe_req = DescribeTableRequest::new();
6596        describe_req.id = Some(vec!["test_table".to_string()]);
6597        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6598        assert_eq!(
6599            describe_response
6600                .properties
6601                .as_ref()
6602                .and_then(|properties| properties.get("owner")),
6603            Some(&"alice".to_string())
6604        );
6605    }
6606
6607    #[tokio::test]
6608    async fn test_create_table_with_manifest_overwrite_replaces_existing_table() {
6609        use lance_namespace::models::{CreateTableRequest, DescribeTableRequest};
6610
6611        let temp_dir = TempStdDir::default();
6612        let temp_path = temp_dir.to_str().unwrap();
6613
6614        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6615            .manifest_enabled(true)
6616            .dir_listing_enabled(false)
6617            .build()
6618            .await
6619            .unwrap();
6620
6621        let mut create_req = CreateTableRequest::new();
6622        create_req.id = Some(vec!["test_table".to_string()]);
6623        create_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6624        namespace
6625            .create_table(
6626                create_req,
6627                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6628            )
6629            .await
6630            .unwrap();
6631
6632        let mut create_req = CreateTableRequest::new();
6633        create_req.id = Some(vec!["test_table".to_string()]);
6634        create_req.mode = Some("overwrite".to_string());
6635        create_req.properties = Some(HashMap::from([("owner".to_string(), "bob".to_string())]));
6636        let response = namespace
6637            .create_table(
6638                create_req,
6639                bytes::Bytes::from(create_single_row_test_ipc_data()),
6640            )
6641            .await
6642            .unwrap();
6643
6644        assert_eq!(response.version, Some(2));
6645        assert_eq!(
6646            response
6647                .properties
6648                .as_ref()
6649                .and_then(|properties| properties.get("owner")),
6650            Some(&"bob".to_string())
6651        );
6652        assert_eq!(
6653            open_dataset(&namespace, "test_table")
6654                .await
6655                .count_rows(None)
6656                .await
6657                .unwrap(),
6658            1
6659        );
6660
6661        let mut describe_req = DescribeTableRequest::new();
6662        describe_req.id = Some(vec!["test_table".to_string()]);
6663        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6664        assert_eq!(
6665            describe_response
6666                .properties
6667                .as_ref()
6668                .and_then(|properties| properties.get("owner")),
6669            Some(&"bob".to_string())
6670        );
6671    }
6672
6673    #[tokio::test]
6674    async fn test_create_table_with_manifest_invalid_mode_rejected() {
6675        use lance_namespace::models::CreateTableRequest;
6676
6677        let temp_dir = TempStdDir::default();
6678        let temp_path = temp_dir.to_str().unwrap();
6679
6680        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6681            .manifest_enabled(true)
6682            .dir_listing_enabled(false)
6683            .build()
6684            .await
6685            .unwrap();
6686
6687        let mut create_req = CreateTableRequest::new();
6688        create_req.id = Some(vec!["test_table".to_string()]);
6689        create_req.mode = Some("append".to_string());
6690        let result = namespace
6691            .create_table(
6692                create_req,
6693                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6694            )
6695            .await;
6696
6697        assert!(result.is_err());
6698        assert!(
6699            result
6700                .unwrap_err()
6701                .to_string()
6702                .contains("Unsupported create_table mode")
6703        );
6704    }
6705
6706    #[tokio::test]
6707    async fn test_merge_insert_into_declared_table_v1_mode_creates_table() {
6708        use lance_namespace::models::{
6709            DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6710            MergeInsertIntoTableRequest,
6711        };
6712
6713        let temp_dir = TempStdDir::default();
6714        let temp_path = temp_dir.to_str().unwrap();
6715
6716        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6717            .manifest_enabled(false)
6718            .build()
6719            .await
6720            .unwrap();
6721
6722        let mut declare_req = DeclareTableRequest::new();
6723        declare_req.id = Some(vec!["test_table".to_string()]);
6724        namespace.declare_table(declare_req).await.unwrap();
6725
6726        let mut merge_req = MergeInsertIntoTableRequest::new();
6727        merge_req.id = Some(vec!["test_table".to_string()]);
6728        merge_req.on = Some("id".to_string());
6729        let response = namespace
6730            .merge_insert_into_table(
6731                merge_req,
6732                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6733            )
6734            .await
6735            .unwrap();
6736
6737        assert_eq!(response.num_inserted_rows, Some(2));
6738        assert_eq!(response.num_updated_rows, Some(0));
6739
6740        let mut describe_req = DescribeTableRequest::new();
6741        describe_req.id = Some(vec!["test_table".to_string()]);
6742        describe_req.load_detailed_metadata = Some(true);
6743        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6744        assert_eq!(describe_response.is_only_declared, Some(false));
6745        assert_eq!(describe_response.version, Some(1));
6746
6747        let mut list_req = ListTablesRequest::new();
6748        list_req.id = Some(vec![]);
6749        list_req.include_declared = Some(false);
6750        assert_eq!(
6751            namespace.list_tables(list_req).await.unwrap().tables,
6752            vec!["test_table".to_string()]
6753        );
6754    }
6755
6756    #[tokio::test]
6757    async fn test_merge_insert_into_declared_table_with_manifest_creates_table() {
6758        use lance_namespace::models::{
6759            DeclareTableRequest, DescribeTableRequest, ListTablesRequest,
6760            MergeInsertIntoTableRequest,
6761        };
6762
6763        let temp_dir = TempStdDir::default();
6764        let temp_path = temp_dir.to_str().unwrap();
6765
6766        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6767            .manifest_enabled(true)
6768            .dir_listing_enabled(false)
6769            .build()
6770            .await
6771            .unwrap();
6772
6773        let mut declare_req = DeclareTableRequest::new();
6774        declare_req.id = Some(vec!["test_table".to_string()]);
6775        namespace.declare_table(declare_req).await.unwrap();
6776
6777        let mut merge_req = MergeInsertIntoTableRequest::new();
6778        merge_req.id = Some(vec!["test_table".to_string()]);
6779        merge_req.on = Some("id".to_string());
6780        let response = namespace
6781            .merge_insert_into_table(
6782                merge_req,
6783                bytes::Bytes::from(create_non_empty_test_ipc_data()),
6784            )
6785            .await
6786            .unwrap();
6787
6788        assert_eq!(response.num_inserted_rows, Some(2));
6789        assert_eq!(response.num_updated_rows, Some(0));
6790
6791        let mut describe_req = DescribeTableRequest::new();
6792        describe_req.id = Some(vec!["test_table".to_string()]);
6793        describe_req.load_detailed_metadata = Some(true);
6794        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6795        assert_eq!(describe_response.is_only_declared, Some(false));
6796        assert_eq!(describe_response.version, Some(1));
6797
6798        let mut list_req = ListTablesRequest::new();
6799        list_req.id = Some(vec![]);
6800        list_req.include_declared = Some(false);
6801        assert_eq!(
6802            namespace.list_tables(list_req).await.unwrap().tables,
6803            vec!["test_table".to_string()]
6804        );
6805    }
6806
6807    #[tokio::test]
6808    async fn test_declare_table_with_manifest() {
6809        use lance_namespace::models::{
6810            DeclareTableRequest, DescribeTableRequest, ListTablesRequest, TableExistsRequest,
6811        };
6812
6813        let temp_dir = TempStdDir::default();
6814        let temp_path = temp_dir.to_str().unwrap();
6815
6816        // Create namespace with manifest
6817        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6818            .manifest_enabled(true)
6819            .dir_listing_enabled(false)
6820            .build()
6821            .await
6822            .unwrap();
6823
6824        // Declare a table
6825        let mut declare_req = DeclareTableRequest::new();
6826        declare_req.id = Some(vec!["test_table".to_string()]);
6827        declare_req.properties = Some(HashMap::from([("owner".to_string(), "alice".to_string())]));
6828        let response = namespace.declare_table(declare_req).await.unwrap();
6829
6830        // Should return location
6831        assert!(response.location.is_some());
6832        assert_eq!(
6833            response
6834                .properties
6835                .as_ref()
6836                .and_then(|properties| properties.get("owner")),
6837            Some(&"alice".to_string())
6838        );
6839
6840        // Table should exist in manifest
6841        let mut exists_req = TableExistsRequest::new();
6842        exists_req.id = Some(vec!["test_table".to_string()]);
6843        assert!(namespace.table_exists(exists_req).await.is_ok());
6844
6845        let mut describe_req = DescribeTableRequest::new();
6846        describe_req.id = Some(vec!["test_table".to_string()]);
6847        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6848        assert_eq!(describe_response.is_only_declared, None);
6849
6850        let mut describe_req = DescribeTableRequest::new();
6851        describe_req.id = Some(vec!["test_table".to_string()]);
6852        describe_req.check_declared = Some(true);
6853        let describe_response = namespace.describe_table(describe_req).await.unwrap();
6854        assert_eq!(describe_response.is_only_declared, Some(true));
6855        assert_eq!(
6856            describe_response
6857                .properties
6858                .as_ref()
6859                .and_then(|properties| properties.get("owner")),
6860            Some(&"alice".to_string())
6861        );
6862
6863        let mut list_req = ListTablesRequest::new();
6864        list_req.id = Some(vec![]);
6865        assert_eq!(
6866            namespace
6867                .list_tables(list_req.clone())
6868                .await
6869                .unwrap()
6870                .tables,
6871            vec!["test_table".to_string()]
6872        );
6873        list_req.include_declared = Some(false);
6874        assert!(
6875            namespace
6876                .list_tables(list_req)
6877                .await
6878                .unwrap()
6879                .tables
6880                .is_empty()
6881        );
6882    }
6883
6884    #[tokio::test]
6885    async fn test_declare_table_when_table_exists() {
6886        use lance_namespace::models::DeclareTableRequest;
6887
6888        let temp_dir = TempStdDir::default();
6889        let temp_path = temp_dir.to_str().unwrap();
6890
6891        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6892            .manifest_enabled(false)
6893            .build()
6894            .await
6895            .unwrap();
6896
6897        // First create a table with actual data
6898        let schema = create_test_schema();
6899        let ipc_data = create_test_ipc_data(&schema);
6900        let mut create_req = CreateTableRequest::new();
6901        create_req.id = Some(vec!["test_table".to_string()]);
6902        namespace
6903            .create_table(create_req, bytes::Bytes::from(ipc_data))
6904            .await
6905            .unwrap();
6906
6907        // Try to declare the same table - should fail because it already has data
6908        let mut declare_req = DeclareTableRequest::new();
6909        declare_req.id = Some(vec!["test_table".to_string()]);
6910        let result = namespace.declare_table(declare_req).await;
6911        assert!(result.is_err());
6912    }
6913
6914    // ============================================================
6915    // Tests for deregister_table in V1 mode
6916    // ============================================================
6917
6918    #[tokio::test]
6919    async fn test_deregister_table_v1_mode() {
6920        use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
6921
6922        let temp_dir = TempStdDir::default();
6923        let temp_path = temp_dir.to_str().unwrap();
6924
6925        // Create namespace in V1 mode (no manifest, with dir listing)
6926        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6927            .manifest_enabled(false)
6928            .dir_listing_enabled(true)
6929            .build()
6930            .await
6931            .unwrap();
6932
6933        // Create a table with data
6934        let schema = create_test_schema();
6935        let ipc_data = create_test_ipc_data(&schema);
6936        let mut create_req = CreateTableRequest::new();
6937        create_req.id = Some(vec!["test_table".to_string()]);
6938        namespace
6939            .create_table(create_req, bytes::Bytes::from(ipc_data))
6940            .await
6941            .unwrap();
6942
6943        // Verify table exists
6944        let mut exists_req = TableExistsRequest::new();
6945        exists_req.id = Some(vec!["test_table".to_string()]);
6946        assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
6947
6948        // Deregister the table
6949        let mut deregister_req = DeregisterTableRequest::new();
6950        deregister_req.id = Some(vec!["test_table".to_string()]);
6951        let response = namespace.deregister_table(deregister_req).await.unwrap();
6952
6953        // Should return location
6954        assert!(response.location.is_some());
6955        let location = response.location.as_ref().unwrap();
6956        assert!(location.contains("test_table"));
6957
6958        // Table should no longer exist (deregistered)
6959        let result = namespace.table_exists(exists_req).await;
6960        assert!(result.is_err());
6961        assert!(result.unwrap_err().to_string().contains("deregistered"));
6962
6963        // Physical data should still exist
6964        let dataset = Dataset::open(location).await;
6965        assert!(dataset.is_ok(), "Physical table data should still exist");
6966    }
6967
6968    #[tokio::test]
6969    async fn test_deregister_table_v1_already_deregistered() {
6970        use lance_namespace::models::DeregisterTableRequest;
6971
6972        let temp_dir = TempStdDir::default();
6973        let temp_path = temp_dir.to_str().unwrap();
6974
6975        let namespace = DirectoryNamespaceBuilder::new(temp_path)
6976            .manifest_enabled(false)
6977            .dir_listing_enabled(true)
6978            .build()
6979            .await
6980            .unwrap();
6981
6982        // Create a table
6983        let schema = create_test_schema();
6984        let ipc_data = create_test_ipc_data(&schema);
6985        let mut create_req = CreateTableRequest::new();
6986        create_req.id = Some(vec!["test_table".to_string()]);
6987        namespace
6988            .create_table(create_req, bytes::Bytes::from(ipc_data))
6989            .await
6990            .unwrap();
6991
6992        // Deregister once
6993        let mut deregister_req = DeregisterTableRequest::new();
6994        deregister_req.id = Some(vec!["test_table".to_string()]);
6995        namespace
6996            .deregister_table(deregister_req.clone())
6997            .await
6998            .unwrap();
6999
7000        // Try to deregister again - should fail
7001        let result = namespace.deregister_table(deregister_req).await;
7002        assert!(result.is_err());
7003        assert!(
7004            result
7005                .unwrap_err()
7006                .to_string()
7007                .contains("already deregistered")
7008        );
7009    }
7010
7011    // ============================================================
7012    // Tests for list_tables skipping deregistered tables
7013    // ============================================================
7014
7015    #[tokio::test]
7016    async fn test_list_tables_skips_deregistered_v1() {
7017        use lance_namespace::models::DeregisterTableRequest;
7018
7019        let temp_dir = TempStdDir::default();
7020        let temp_path = temp_dir.to_str().unwrap();
7021
7022        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7023            .manifest_enabled(false)
7024            .dir_listing_enabled(true)
7025            .build()
7026            .await
7027            .unwrap();
7028
7029        // Create two tables
7030        let schema = create_test_schema();
7031        let ipc_data = create_test_ipc_data(&schema);
7032
7033        let mut create_req1 = CreateTableRequest::new();
7034        create_req1.id = Some(vec!["table1".to_string()]);
7035        namespace
7036            .create_table(create_req1, bytes::Bytes::from(ipc_data.clone()))
7037            .await
7038            .unwrap();
7039
7040        let mut create_req2 = CreateTableRequest::new();
7041        create_req2.id = Some(vec!["table2".to_string()]);
7042        namespace
7043            .create_table(create_req2, bytes::Bytes::from(ipc_data))
7044            .await
7045            .unwrap();
7046
7047        // List tables - should see both (root namespace = empty vec)
7048        let mut list_req = ListTablesRequest::new();
7049        list_req.id = Some(vec![]);
7050        let list_response = namespace.list_tables(list_req.clone()).await.unwrap();
7051        assert_eq!(list_response.tables.len(), 2);
7052
7053        // Deregister table1
7054        let mut deregister_req = DeregisterTableRequest::new();
7055        deregister_req.id = Some(vec!["table1".to_string()]);
7056        namespace.deregister_table(deregister_req).await.unwrap();
7057
7058        // List tables - should only see table2
7059        let list_response = namespace.list_tables(list_req).await.unwrap();
7060        assert_eq!(list_response.tables.len(), 1);
7061        assert!(list_response.tables.contains(&"table2".to_string()));
7062        assert!(!list_response.tables.contains(&"table1".to_string()));
7063    }
7064
7065    // ============================================================
7066    // Tests for describe_table and table_exists with deregistered tables
7067    // ============================================================
7068
7069    #[tokio::test]
7070    async fn test_describe_table_fails_for_deregistered_v1() {
7071        use lance_namespace::models::{DeregisterTableRequest, DescribeTableRequest};
7072
7073        let temp_dir = TempStdDir::default();
7074        let temp_path = temp_dir.to_str().unwrap();
7075
7076        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7077            .manifest_enabled(false)
7078            .dir_listing_enabled(true)
7079            .build()
7080            .await
7081            .unwrap();
7082
7083        // Create a table
7084        let schema = create_test_schema();
7085        let ipc_data = create_test_ipc_data(&schema);
7086        let mut create_req = CreateTableRequest::new();
7087        create_req.id = Some(vec!["test_table".to_string()]);
7088        namespace
7089            .create_table(create_req, bytes::Bytes::from(ipc_data))
7090            .await
7091            .unwrap();
7092
7093        // Describe should work before deregistration
7094        let mut describe_req = DescribeTableRequest::new();
7095        describe_req.id = Some(vec!["test_table".to_string()]);
7096        assert!(namespace.describe_table(describe_req.clone()).await.is_ok());
7097
7098        // Deregister
7099        let mut deregister_req = DeregisterTableRequest::new();
7100        deregister_req.id = Some(vec!["test_table".to_string()]);
7101        namespace.deregister_table(deregister_req).await.unwrap();
7102
7103        // Describe should fail after deregistration
7104        let result = namespace.describe_table(describe_req).await;
7105        assert!(result.is_err());
7106        let err = result.unwrap_err();
7107        assert!(matches!(err, Error::Namespace { .. }));
7108        let err_msg = err.to_string();
7109        assert!(err_msg.contains("deregistered"));
7110        assert!(err_msg.contains("table id 'test_table'"));
7111    }
7112
7113    #[tokio::test]
7114    async fn test_table_exists_fails_for_deregistered_v1() {
7115        use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest};
7116
7117        let temp_dir = TempStdDir::default();
7118        let temp_path = temp_dir.to_str().unwrap();
7119
7120        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7121            .manifest_enabled(false)
7122            .dir_listing_enabled(true)
7123            .build()
7124            .await
7125            .unwrap();
7126
7127        // Create a table
7128        let schema = create_test_schema();
7129        let ipc_data = create_test_ipc_data(&schema);
7130        let mut create_req = CreateTableRequest::new();
7131        create_req.id = Some(vec!["test_table".to_string()]);
7132        namespace
7133            .create_table(create_req, bytes::Bytes::from(ipc_data))
7134            .await
7135            .unwrap();
7136
7137        // Table exists should work before deregistration
7138        let mut exists_req = TableExistsRequest::new();
7139        exists_req.id = Some(vec!["test_table".to_string()]);
7140        assert!(namespace.table_exists(exists_req.clone()).await.is_ok());
7141
7142        // Deregister
7143        let mut deregister_req = DeregisterTableRequest::new();
7144        deregister_req.id = Some(vec!["test_table".to_string()]);
7145        namespace.deregister_table(deregister_req).await.unwrap();
7146
7147        // Table exists should fail after deregistration
7148        let result = namespace.table_exists(exists_req).await;
7149        assert!(result.is_err());
7150        let err = result.unwrap_err();
7151        assert!(matches!(err, Error::Namespace { .. }));
7152        let err_msg = err.to_string();
7153        assert!(err_msg.contains("deregistered"));
7154        assert!(err_msg.contains("table id 'test_table'"));
7155    }
7156
7157    #[tokio::test]
7158    async fn test_atomic_table_status_check() {
7159        // This test verifies that the TableStatus check is atomic
7160        // by ensuring a single directory listing is used
7161
7162        let temp_dir = TempStdDir::default();
7163        let temp_path = temp_dir.to_str().unwrap();
7164
7165        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7166            .manifest_enabled(false)
7167            .dir_listing_enabled(true)
7168            .build()
7169            .await
7170            .unwrap();
7171
7172        // Create a table
7173        let schema = create_test_schema();
7174        let ipc_data = create_test_ipc_data(&schema);
7175        let mut create_req = CreateTableRequest::new();
7176        create_req.id = Some(vec!["test_table".to_string()]);
7177        namespace
7178            .create_table(create_req, bytes::Bytes::from(ipc_data))
7179            .await
7180            .unwrap();
7181
7182        // Table status should show exists=true, is_deregistered=false
7183        let status = namespace.check_table_status("test_table").await;
7184        assert!(status.exists);
7185        assert!(!status.is_deregistered);
7186        assert!(!status.has_reserved_file);
7187    }
7188
7189    #[tokio::test]
7190    async fn test_table_version_tracking_enabled_managed_versioning() {
7191        use lance_namespace::models::DescribeTableRequest;
7192
7193        let temp_dir = TempStdDir::default();
7194        let temp_path = temp_dir.to_str().unwrap();
7195
7196        // Create namespace with table_version_tracking_enabled=true
7197        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7198            .table_version_tracking_enabled(true)
7199            .build()
7200            .await
7201            .unwrap();
7202
7203        // Create a table
7204        let schema = create_test_schema();
7205        let ipc_data = create_test_ipc_data(&schema);
7206        let mut create_req = CreateTableRequest::new();
7207        create_req.id = Some(vec!["test_table".to_string()]);
7208        namespace
7209            .create_table(create_req, bytes::Bytes::from(ipc_data))
7210            .await
7211            .unwrap();
7212
7213        // Describe table should return managed_versioning=true
7214        let mut describe_req = DescribeTableRequest::new();
7215        describe_req.id = Some(vec!["test_table".to_string()]);
7216        let describe_resp = namespace.describe_table(describe_req).await.unwrap();
7217
7218        // managed_versioning should be true
7219        assert_eq!(
7220            describe_resp.managed_versioning,
7221            Some(true),
7222            "managed_versioning should be true when table_version_tracking_enabled=true"
7223        );
7224    }
7225
7226    #[tokio::test]
7227    async fn test_table_version_tracking_disabled_no_managed_versioning() {
7228        use lance_namespace::models::DescribeTableRequest;
7229
7230        let temp_dir = TempStdDir::default();
7231        let temp_path = temp_dir.to_str().unwrap();
7232
7233        // Create namespace with table_version_tracking_enabled=false (default)
7234        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7235            .table_version_tracking_enabled(false)
7236            .build()
7237            .await
7238            .unwrap();
7239
7240        // Create a table
7241        let schema = create_test_schema();
7242        let ipc_data = create_test_ipc_data(&schema);
7243        let mut create_req = CreateTableRequest::new();
7244        create_req.id = Some(vec!["test_table".to_string()]);
7245        namespace
7246            .create_table(create_req, bytes::Bytes::from(ipc_data))
7247            .await
7248            .unwrap();
7249
7250        // Describe table should not have managed_versioning set
7251        let mut describe_req = DescribeTableRequest::new();
7252        describe_req.id = Some(vec!["test_table".to_string()]);
7253        let describe_resp = namespace.describe_table(describe_req).await.unwrap();
7254
7255        // managed_versioning should be None when table_version_tracking_enabled=false
7256        assert!(
7257            describe_resp.managed_versioning.is_none(),
7258            "managed_versioning should be None when table_version_tracking_enabled=false, got: {:?}",
7259            describe_resp.managed_versioning
7260        );
7261    }
7262
7263    #[tokio::test]
7264    async fn test_list_table_versions() {
7265        use arrow::array::{Int32Array, RecordBatchIterator};
7266        use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7267        use arrow::record_batch::RecordBatch;
7268        use lance::dataset::{Dataset, WriteMode, WriteParams};
7269        use lance_namespace::models::{CreateNamespaceRequest, ListTableVersionsRequest};
7270
7271        let temp_dir = TempStrDir::default();
7272        let temp_path: &str = &temp_dir;
7273
7274        let namespace: Arc<dyn LanceNamespace> = Arc::new(
7275            DirectoryNamespaceBuilder::new(temp_path)
7276                .table_version_tracking_enabled(true)
7277                .build()
7278                .await
7279                .unwrap(),
7280        );
7281
7282        // Create parent namespace first
7283        let mut create_ns_req = CreateNamespaceRequest::new();
7284        create_ns_req.id = Some(vec!["workspace".to_string()]);
7285        namespace.create_namespace(create_ns_req).await.unwrap();
7286
7287        // Create a table using write_into_namespace (version 1)
7288        let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7289        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7290            "id",
7291            DataType::Int32,
7292            false,
7293        )]));
7294        let batch = RecordBatch::try_new(
7295            arrow_schema.clone(),
7296            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7297        )
7298        .unwrap();
7299        let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone());
7300        let write_params = WriteParams {
7301            mode: WriteMode::Create,
7302            ..Default::default()
7303        };
7304        let mut dataset = Dataset::write_into_namespace(
7305            batches,
7306            namespace.clone(),
7307            table_id.clone(),
7308            Some(write_params),
7309        )
7310        .await
7311        .unwrap();
7312
7313        // Append to create version 2
7314        let batch2 = RecordBatch::try_new(
7315            arrow_schema.clone(),
7316            vec![Arc::new(Int32Array::from(vec![100, 200]))],
7317        )
7318        .unwrap();
7319        let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema.clone());
7320        dataset.append(batches, None).await.unwrap();
7321
7322        // Append to create version 3
7323        let batch3 = RecordBatch::try_new(
7324            arrow_schema.clone(),
7325            vec![Arc::new(Int32Array::from(vec![300, 400]))],
7326        )
7327        .unwrap();
7328        let batches = RecordBatchIterator::new(vec![Ok(batch3)], arrow_schema);
7329        dataset.append(batches, None).await.unwrap();
7330
7331        // List versions - should have versions 1, 2, and 3
7332        let mut list_req = ListTableVersionsRequest::new();
7333        list_req.id = Some(table_id.clone());
7334        let list_resp = namespace.list_table_versions(list_req).await.unwrap();
7335
7336        assert_eq!(
7337            list_resp.versions.len(),
7338            3,
7339            "Should have 3 versions, got: {:?}",
7340            list_resp.versions
7341        );
7342
7343        // Verify each version
7344        for expected_version in 1..=3 {
7345            let version = list_resp
7346                .versions
7347                .iter()
7348                .find(|v| v.version == expected_version)
7349                .unwrap_or_else(|| panic!("Expected version {}", expected_version));
7350
7351            assert!(
7352                !version.manifest_path.is_empty(),
7353                "manifest_path should be set for version {}",
7354                expected_version
7355            );
7356            assert!(
7357                version.manifest_path.contains(".manifest"),
7358                "manifest_path should contain .manifest for version {}",
7359                expected_version
7360            );
7361            assert!(
7362                version.manifest_size.is_some(),
7363                "manifest_size should be set for version {}",
7364                expected_version
7365            );
7366            assert!(
7367                version.manifest_size.unwrap() > 0,
7368                "manifest_size should be > 0 for version {}",
7369                expected_version
7370            );
7371            assert!(
7372                version.timestamp_millis.is_some(),
7373                "timestamp_millis should be set for version {}",
7374                expected_version
7375            );
7376        }
7377    }
7378
7379    #[tokio::test]
7380    async fn test_describe_table_version() {
7381        use arrow::array::{Int32Array, RecordBatchIterator};
7382        use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7383        use arrow::record_batch::RecordBatch;
7384        use lance::dataset::{Dataset, WriteMode, WriteParams};
7385        use lance_namespace::models::{CreateNamespaceRequest, DescribeTableVersionRequest};
7386
7387        let temp_dir = TempStrDir::default();
7388        let temp_path: &str = &temp_dir;
7389
7390        let namespace: Arc<dyn LanceNamespace> = Arc::new(
7391            DirectoryNamespaceBuilder::new(temp_path)
7392                .table_version_tracking_enabled(true)
7393                .build()
7394                .await
7395                .unwrap(),
7396        );
7397
7398        // Create parent namespace first
7399        let mut create_ns_req = CreateNamespaceRequest::new();
7400        create_ns_req.id = Some(vec!["workspace".to_string()]);
7401        namespace.create_namespace(create_ns_req).await.unwrap();
7402
7403        // Create a table using write_into_namespace (version 1)
7404        let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7405        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7406            "id",
7407            DataType::Int32,
7408            false,
7409        )]));
7410        let batch = RecordBatch::try_new(
7411            arrow_schema.clone(),
7412            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7413        )
7414        .unwrap();
7415        let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
7416        let write_params = WriteParams {
7417            mode: WriteMode::Create,
7418            ..Default::default()
7419        };
7420        let mut dataset = Dataset::write_into_namespace(
7421            batches,
7422            namespace.clone(),
7423            table_id.clone(),
7424            Some(write_params),
7425        )
7426        .await
7427        .unwrap();
7428
7429        // Append data to create version 2
7430        let batch2 = RecordBatch::try_new(
7431            arrow_schema.clone(),
7432            vec![Arc::new(Int32Array::from(vec![100, 200]))],
7433        )
7434        .unwrap();
7435        let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema);
7436        dataset.append(batches, None).await.unwrap();
7437
7438        // Describe version 1
7439        let mut describe_req = DescribeTableVersionRequest::new();
7440        describe_req.id = Some(table_id.clone());
7441        describe_req.version = Some(1);
7442        let describe_resp = namespace
7443            .describe_table_version(describe_req)
7444            .await
7445            .unwrap();
7446
7447        let version = &describe_resp.version;
7448        assert_eq!(version.version, 1);
7449        assert!(version.timestamp_millis.is_some());
7450        assert!(
7451            !version.manifest_path.is_empty(),
7452            "manifest_path should be set"
7453        );
7454        assert!(
7455            version.manifest_path.contains(".manifest"),
7456            "manifest_path should contain .manifest"
7457        );
7458        assert!(
7459            version.manifest_size.is_some(),
7460            "manifest_size should be set"
7461        );
7462        assert!(
7463            version.manifest_size.unwrap() > 0,
7464            "manifest_size should be > 0"
7465        );
7466
7467        // Describe version 2
7468        let mut describe_req = DescribeTableVersionRequest::new();
7469        describe_req.id = Some(table_id.clone());
7470        describe_req.version = Some(2);
7471        let describe_resp = namespace
7472            .describe_table_version(describe_req)
7473            .await
7474            .unwrap();
7475
7476        let version = &describe_resp.version;
7477        assert_eq!(version.version, 2);
7478        assert!(version.timestamp_millis.is_some());
7479        assert!(
7480            !version.manifest_path.is_empty(),
7481            "manifest_path should be set"
7482        );
7483        assert!(
7484            version.manifest_size.is_some(),
7485            "manifest_size should be set"
7486        );
7487        assert!(
7488            version.manifest_size.unwrap() > 0,
7489            "manifest_size should be > 0"
7490        );
7491    }
7492
7493    #[tokio::test]
7494    async fn test_describe_table_version_latest() {
7495        use arrow::array::{Int32Array, RecordBatchIterator};
7496        use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
7497        use arrow::record_batch::RecordBatch;
7498        use lance::dataset::{Dataset, WriteMode, WriteParams};
7499        use lance_namespace::models::{CreateNamespaceRequest, DescribeTableVersionRequest};
7500
7501        let temp_dir = TempStrDir::default();
7502        let temp_path: &str = &temp_dir;
7503
7504        let namespace: Arc<dyn LanceNamespace> = Arc::new(
7505            DirectoryNamespaceBuilder::new(temp_path)
7506                .table_version_tracking_enabled(true)
7507                .build()
7508                .await
7509                .unwrap(),
7510        );
7511
7512        // Create parent namespace first
7513        let mut create_ns_req = CreateNamespaceRequest::new();
7514        create_ns_req.id = Some(vec!["workspace".to_string()]);
7515        namespace.create_namespace(create_ns_req).await.unwrap();
7516
7517        // Create a table using write_into_namespace (version 1)
7518        let table_id = vec!["workspace".to_string(), "test_table".to_string()];
7519        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
7520            "id",
7521            DataType::Int32,
7522            false,
7523        )]));
7524        let batch = RecordBatch::try_new(
7525            arrow_schema.clone(),
7526            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
7527        )
7528        .unwrap();
7529        let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
7530        let write_params = WriteParams {
7531            mode: WriteMode::Create,
7532            ..Default::default()
7533        };
7534        let mut dataset = Dataset::write_into_namespace(
7535            batches,
7536            namespace.clone(),
7537            table_id.clone(),
7538            Some(write_params),
7539        )
7540        .await
7541        .unwrap();
7542
7543        // Append to create version 2
7544        let batch2 = RecordBatch::try_new(
7545            arrow_schema.clone(),
7546            vec![Arc::new(Int32Array::from(vec![100, 200]))],
7547        )
7548        .unwrap();
7549        let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema.clone());
7550        dataset.append(batches, None).await.unwrap();
7551
7552        // Append to create version 3
7553        let batch3 = RecordBatch::try_new(
7554            arrow_schema.clone(),
7555            vec![Arc::new(Int32Array::from(vec![300, 400]))],
7556        )
7557        .unwrap();
7558        let batches = RecordBatchIterator::new(vec![Ok(batch3)], arrow_schema);
7559        dataset.append(batches, None).await.unwrap();
7560
7561        // Describe latest version (no version specified)
7562        let mut describe_req = DescribeTableVersionRequest::new();
7563        describe_req.id = Some(table_id.clone());
7564        describe_req.version = None;
7565        let describe_resp = namespace
7566            .describe_table_version(describe_req)
7567            .await
7568            .unwrap();
7569
7570        // Should return version 3 as it's the latest
7571        assert_eq!(describe_resp.version.version, 3);
7572    }
7573
7574    #[tokio::test]
7575    async fn test_create_table_version() {
7576        use futures::TryStreamExt;
7577        use lance::dataset::builder::DatasetBuilder;
7578        use lance_namespace::models::CreateTableVersionRequest;
7579
7580        let temp_dir = TempStrDir::default();
7581        let temp_path: &str = &temp_dir;
7582
7583        let namespace: Arc<dyn LanceNamespace> = Arc::new(
7584            DirectoryNamespaceBuilder::new(temp_path)
7585                .table_version_tracking_enabled(true)
7586                .build()
7587                .await
7588                .unwrap(),
7589        );
7590
7591        // Create a table
7592        let schema = create_test_schema();
7593        let ipc_data = create_test_ipc_data(&schema);
7594        let mut create_req = CreateTableRequest::new();
7595        create_req.id = Some(vec!["test_table".to_string()]);
7596        namespace
7597            .create_table(create_req, bytes::Bytes::from(ipc_data))
7598            .await
7599            .unwrap();
7600
7601        // Open the dataset using from_namespace to get proper object_store and paths
7602        let table_id = vec!["test_table".to_string()];
7603        let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
7604            .await
7605            .unwrap()
7606            .load()
7607            .await
7608            .unwrap();
7609
7610        // Use dataset's object_store to find and copy the manifest
7611        let versions_path = dataset.versions_dir();
7612        let manifest_metas: Vec<_> = dataset
7613            .object_store()
7614            .inner
7615            .list(Some(&versions_path))
7616            .try_collect()
7617            .await
7618            .unwrap();
7619
7620        let manifest_meta = manifest_metas
7621            .iter()
7622            .find(|m| {
7623                m.location
7624                    .filename()
7625                    .map(|f| f.ends_with(".manifest"))
7626                    .unwrap_or(false)
7627            })
7628            .expect("No manifest file found");
7629
7630        // Read the existing manifest data
7631        let manifest_data = dataset
7632            .object_store()
7633            .inner
7634            .get(&manifest_meta.location)
7635            .await
7636            .unwrap()
7637            .bytes()
7638            .await
7639            .unwrap();
7640
7641        // Write to a staging location using the dataset's object_store
7642        let staging_path = dataset.versions_dir().child("staging_manifest");
7643        dataset
7644            .object_store()
7645            .inner
7646            .put(&staging_path, manifest_data.into())
7647            .await
7648            .unwrap();
7649
7650        // Create version 2 from staging manifest
7651        // Use the same naming scheme as the existing dataset (V2)
7652        let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7653        create_version_req.id = Some(table_id.clone());
7654        create_version_req.naming_scheme = Some("V2".to_string());
7655
7656        let result = namespace.create_table_version(create_version_req).await;
7657        assert!(
7658            result.is_ok(),
7659            "create_table_version should succeed: {:?}",
7660            result
7661        );
7662
7663        // Verify version 2 was created at the path returned in the response
7664        let response = result.unwrap();
7665        let version_info = response
7666            .version
7667            .expect("response should contain version info");
7668        let version_2_path = Path::parse(&version_info.manifest_path).unwrap();
7669        let head_result = dataset.object_store().inner.head(&version_2_path).await;
7670        assert!(
7671            head_result.is_ok(),
7672            "Version 2 manifest should exist at {}",
7673            version_2_path
7674        );
7675
7676        // Verify the staging file has been deleted
7677        let staging_head_result = dataset.object_store().inner.head(&staging_path).await;
7678        assert!(
7679            staging_head_result.is_err(),
7680            "Staging manifest should have been deleted after create_table_version"
7681        );
7682    }
7683
7684    #[tokio::test]
7685    async fn test_create_table_version_conflict() {
7686        // create_table_version should fail if the version already exists.
7687        // Each version always writes to a new file location.
7688        use futures::TryStreamExt;
7689        use lance::dataset::builder::DatasetBuilder;
7690        use lance_namespace::models::CreateTableVersionRequest;
7691
7692        let temp_dir = TempStrDir::default();
7693        let temp_path: &str = &temp_dir;
7694
7695        let namespace: Arc<dyn LanceNamespace> = Arc::new(
7696            DirectoryNamespaceBuilder::new(temp_path)
7697                .table_version_tracking_enabled(true)
7698                .build()
7699                .await
7700                .unwrap(),
7701        );
7702
7703        // Create a table
7704        let schema = create_test_schema();
7705        let ipc_data = create_test_ipc_data(&schema);
7706        let mut create_req = CreateTableRequest::new();
7707        create_req.id = Some(vec!["test_table".to_string()]);
7708        namespace
7709            .create_table(create_req, bytes::Bytes::from(ipc_data))
7710            .await
7711            .unwrap();
7712
7713        // Open the dataset using from_namespace to get proper object_store and paths
7714        let table_id = vec!["test_table".to_string()];
7715        let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
7716            .await
7717            .unwrap()
7718            .load()
7719            .await
7720            .unwrap();
7721
7722        // Use dataset's object_store to find and copy the manifest
7723        let versions_path = dataset.versions_dir();
7724        let manifest_metas: Vec<_> = dataset
7725            .object_store()
7726            .inner
7727            .list(Some(&versions_path))
7728            .try_collect()
7729            .await
7730            .unwrap();
7731
7732        let manifest_meta = manifest_metas
7733            .iter()
7734            .find(|m| {
7735                m.location
7736                    .filename()
7737                    .map(|f| f.ends_with(".manifest"))
7738                    .unwrap_or(false)
7739            })
7740            .expect("No manifest file found");
7741
7742        // Read the existing manifest data
7743        let manifest_data = dataset
7744            .object_store()
7745            .inner
7746            .get(&manifest_meta.location)
7747            .await
7748            .unwrap()
7749            .bytes()
7750            .await
7751            .unwrap();
7752
7753        // Write to a staging location using the dataset's object_store
7754        let staging_path = dataset.versions_dir().child("staging_manifest");
7755        dataset
7756            .object_store()
7757            .inner
7758            .put(&staging_path, manifest_data.into())
7759            .await
7760            .unwrap();
7761
7762        // First create version 2 (should succeed)
7763        let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7764        create_version_req.id = Some(table_id.clone());
7765        create_version_req.naming_scheme = Some("V2".to_string());
7766        let first_result = namespace.create_table_version(create_version_req).await;
7767        assert!(
7768            first_result.is_ok(),
7769            "First create_table_version for version 2 should succeed: {:?}",
7770            first_result
7771        );
7772
7773        // Get the path from the response for verification
7774        let version_2_path = Path::parse(
7775            &first_result
7776                .unwrap()
7777                .version
7778                .expect("response should contain version info")
7779                .manifest_path,
7780        )
7781        .unwrap();
7782
7783        // Create version 2 again (should fail - conflict)
7784        let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string());
7785        create_version_req.id = Some(table_id.clone());
7786        create_version_req.naming_scheme = Some("V2".to_string());
7787
7788        let result = namespace.create_table_version(create_version_req).await;
7789        assert!(
7790            result.is_err(),
7791            "create_table_version should fail for existing version"
7792        );
7793
7794        // Verify version 2 still exists using the dataset's object_store
7795        let head_result = dataset.object_store().inner.head(&version_2_path).await;
7796        assert!(
7797            head_result.is_ok(),
7798            "Version 2 manifest should still exist at {}",
7799            version_2_path
7800        );
7801    }
7802
7803    #[tokio::test]
7804    async fn test_create_table_version_table_not_found() {
7805        use lance_namespace::models::CreateTableVersionRequest;
7806
7807        let temp_dir = TempStdDir::default();
7808        let temp_path = temp_dir.to_str().unwrap();
7809
7810        let namespace = DirectoryNamespaceBuilder::new(temp_path)
7811            .table_version_tracking_enabled(true)
7812            .build()
7813            .await
7814            .unwrap();
7815
7816        // Try to create version for non-existent table
7817        let mut create_version_req =
7818            CreateTableVersionRequest::new(1, "/some/staging/path".to_string());
7819        create_version_req.id = Some(vec!["non_existent_table".to_string()]);
7820
7821        let result = namespace.create_table_version(create_version_req).await;
7822        assert!(
7823            result.is_err(),
7824            "create_table_version should fail for non-existent table"
7825        );
7826        let err_msg = result.unwrap_err().to_string();
7827        assert!(
7828            err_msg.contains("Table not found"),
7829            "Error should mention table not found, got: {}",
7830            err_msg
7831        );
7832    }
7833
7834    /// End-to-end integration test module for table version tracking.
7835    mod e2e_table_version_tracking {
7836        use super::*;
7837        use std::sync::atomic::{AtomicUsize, Ordering};
7838
7839        /// Tracking wrapper around a namespace that counts method invocations.
7840        struct TrackingNamespace {
7841            inner: DirectoryNamespace,
7842            create_table_version_count: AtomicUsize,
7843            describe_table_version_count: AtomicUsize,
7844            list_table_versions_count: AtomicUsize,
7845        }
7846
7847        impl TrackingNamespace {
7848            fn new(inner: DirectoryNamespace) -> Self {
7849                Self {
7850                    inner,
7851                    create_table_version_count: AtomicUsize::new(0),
7852                    describe_table_version_count: AtomicUsize::new(0),
7853                    list_table_versions_count: AtomicUsize::new(0),
7854                }
7855            }
7856
7857            fn create_table_version_calls(&self) -> usize {
7858                self.create_table_version_count.load(Ordering::SeqCst)
7859            }
7860
7861            fn describe_table_version_calls(&self) -> usize {
7862                self.describe_table_version_count.load(Ordering::SeqCst)
7863            }
7864
7865            fn list_table_versions_calls(&self) -> usize {
7866                self.list_table_versions_count.load(Ordering::SeqCst)
7867            }
7868        }
7869
7870        impl std::fmt::Debug for TrackingNamespace {
7871            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7872                f.debug_struct("TrackingNamespace")
7873                    .field(
7874                        "create_table_version_calls",
7875                        &self.create_table_version_calls(),
7876                    )
7877                    .finish()
7878            }
7879        }
7880
7881        #[async_trait]
7882        impl LanceNamespace for TrackingNamespace {
7883            async fn create_namespace(
7884                &self,
7885                request: CreateNamespaceRequest,
7886            ) -> Result<CreateNamespaceResponse> {
7887                self.inner.create_namespace(request).await
7888            }
7889
7890            async fn describe_namespace(
7891                &self,
7892                request: DescribeNamespaceRequest,
7893            ) -> Result<DescribeNamespaceResponse> {
7894                self.inner.describe_namespace(request).await
7895            }
7896
7897            async fn namespace_exists(&self, request: NamespaceExistsRequest) -> Result<()> {
7898                self.inner.namespace_exists(request).await
7899            }
7900
7901            async fn list_namespaces(
7902                &self,
7903                request: ListNamespacesRequest,
7904            ) -> Result<ListNamespacesResponse> {
7905                self.inner.list_namespaces(request).await
7906            }
7907
7908            async fn drop_namespace(
7909                &self,
7910                request: DropNamespaceRequest,
7911            ) -> Result<DropNamespaceResponse> {
7912                self.inner.drop_namespace(request).await
7913            }
7914
7915            async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
7916                self.inner.list_tables(request).await
7917            }
7918
7919            async fn describe_table(
7920                &self,
7921                request: DescribeTableRequest,
7922            ) -> Result<DescribeTableResponse> {
7923                self.inner.describe_table(request).await
7924            }
7925
7926            async fn table_exists(&self, request: TableExistsRequest) -> Result<()> {
7927                self.inner.table_exists(request).await
7928            }
7929
7930            async fn drop_table(&self, request: DropTableRequest) -> Result<DropTableResponse> {
7931                self.inner.drop_table(request).await
7932            }
7933
7934            async fn create_table(
7935                &self,
7936                request: CreateTableRequest,
7937                request_data: Bytes,
7938            ) -> Result<CreateTableResponse> {
7939                self.inner.create_table(request, request_data).await
7940            }
7941
7942            async fn declare_table(
7943                &self,
7944                request: DeclareTableRequest,
7945            ) -> Result<DeclareTableResponse> {
7946                self.inner.declare_table(request).await
7947            }
7948
7949            async fn list_table_versions(
7950                &self,
7951                request: ListTableVersionsRequest,
7952            ) -> Result<ListTableVersionsResponse> {
7953                self.list_table_versions_count
7954                    .fetch_add(1, Ordering::SeqCst);
7955                self.inner.list_table_versions(request).await
7956            }
7957
7958            async fn create_table_version(
7959                &self,
7960                request: CreateTableVersionRequest,
7961            ) -> Result<CreateTableVersionResponse> {
7962                self.create_table_version_count
7963                    .fetch_add(1, Ordering::SeqCst);
7964                self.inner.create_table_version(request).await
7965            }
7966
7967            async fn describe_table_version(
7968                &self,
7969                request: DescribeTableVersionRequest,
7970            ) -> Result<DescribeTableVersionResponse> {
7971                self.describe_table_version_count
7972                    .fetch_add(1, Ordering::SeqCst);
7973                self.inner.describe_table_version(request).await
7974            }
7975
7976            async fn batch_delete_table_versions(
7977                &self,
7978                request: BatchDeleteTableVersionsRequest,
7979            ) -> Result<BatchDeleteTableVersionsResponse> {
7980                self.inner.batch_delete_table_versions(request).await
7981            }
7982
7983            fn namespace_id(&self) -> String {
7984                self.inner.namespace_id()
7985            }
7986        }
7987
7988        #[tokio::test]
7989        async fn test_describe_table_returns_managed_versioning() {
7990            use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest};
7991
7992            let temp_dir = TempStdDir::default();
7993            let temp_path = temp_dir.to_str().unwrap();
7994
7995            // Create namespace with table_version_tracking_enabled and manifest_enabled
7996            let ns = DirectoryNamespaceBuilder::new(temp_path)
7997                .table_version_tracking_enabled(true)
7998                .manifest_enabled(true)
7999                .build()
8000                .await
8001                .unwrap();
8002
8003            // Create parent namespace
8004            let mut create_ns_req = CreateNamespaceRequest::new();
8005            create_ns_req.id = Some(vec!["workspace".to_string()]);
8006            ns.create_namespace(create_ns_req).await.unwrap();
8007
8008            // Create a table with multi-level ID (namespace + table)
8009            let schema = create_test_schema();
8010            let ipc_data = create_test_ipc_data(&schema);
8011            let mut create_req = CreateTableRequest::new();
8012            create_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]);
8013            ns.create_table(create_req, bytes::Bytes::from(ipc_data))
8014                .await
8015                .unwrap();
8016
8017            // Describe table should return managed_versioning=true
8018            let mut describe_req = DescribeTableRequest::new();
8019            describe_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]);
8020            let describe_resp = ns.describe_table(describe_req).await.unwrap();
8021
8022            // managed_versioning should be true
8023            assert_eq!(
8024                describe_resp.managed_versioning,
8025                Some(true),
8026                "managed_versioning should be true when table_version_tracking_enabled=true"
8027            );
8028        }
8029
8030        #[tokio::test]
8031        async fn test_external_manifest_store_invokes_namespace_apis() {
8032            use arrow::array::{Int32Array, StringArray};
8033            use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
8034            use arrow::record_batch::RecordBatch;
8035            use lance::Dataset;
8036            use lance::dataset::builder::DatasetBuilder;
8037            use lance::dataset::{WriteMode, WriteParams};
8038            use lance_namespace::models::CreateNamespaceRequest;
8039
8040            let temp_dir = TempStdDir::default();
8041            let temp_path = temp_dir.to_str().unwrap();
8042
8043            // Create namespace with table_version_tracking_enabled and manifest_enabled
8044            let inner_ns = DirectoryNamespaceBuilder::new(temp_path)
8045                .table_version_tracking_enabled(true)
8046                .manifest_enabled(true)
8047                .build()
8048                .await
8049                .unwrap();
8050
8051            let tracking_ns = Arc::new(TrackingNamespace::new(inner_ns));
8052            let ns: Arc<dyn LanceNamespace> = tracking_ns.clone();
8053
8054            // Create parent namespace
8055            let mut create_ns_req = CreateNamespaceRequest::new();
8056            create_ns_req.id = Some(vec!["workspace".to_string()]);
8057            ns.create_namespace(create_ns_req).await.unwrap();
8058
8059            // Create a table with multi-level ID (namespace + table)
8060            let table_id = vec!["workspace".to_string(), "test_table".to_string()];
8061
8062            // Create some initial data
8063            let arrow_schema = Arc::new(ArrowSchema::new(vec![
8064                Field::new("id", DataType::Int32, false),
8065                Field::new("name", DataType::Utf8, true),
8066            ]));
8067            let batch = RecordBatch::try_new(
8068                arrow_schema.clone(),
8069                vec![
8070                    Arc::new(Int32Array::from(vec![1, 2, 3])),
8071                    Arc::new(StringArray::from(vec!["a", "b", "c"])),
8072                ],
8073            )
8074            .unwrap();
8075
8076            // Create a table using write_into_namespace
8077            let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone());
8078            let write_params = WriteParams {
8079                mode: WriteMode::Create,
8080                ..Default::default()
8081            };
8082            let mut dataset = Dataset::write_into_namespace(
8083                batches,
8084                ns.clone(),
8085                table_id.clone(),
8086                Some(write_params),
8087            )
8088            .await
8089            .unwrap();
8090            assert_eq!(dataset.version().version, 1);
8091
8092            // Verify create_table_version was called once during initial write_into_namespace
8093            assert_eq!(
8094                tracking_ns.create_table_version_calls(),
8095                1,
8096                "create_table_version should have been called once during initial write_into_namespace"
8097            );
8098
8099            // Append data - this should call create_table_version again
8100            let append_batch = RecordBatch::try_new(
8101                arrow_schema.clone(),
8102                vec![
8103                    Arc::new(Int32Array::from(vec![4, 5, 6])),
8104                    Arc::new(StringArray::from(vec!["d", "e", "f"])),
8105                ],
8106            )
8107            .unwrap();
8108            let append_batches = RecordBatchIterator::new(vec![Ok(append_batch)], arrow_schema);
8109            dataset.append(append_batches, None).await.unwrap();
8110
8111            assert_eq!(
8112                tracking_ns.create_table_version_calls(),
8113                2,
8114                "create_table_version should have been called twice (once for create, once for append)"
8115            );
8116
8117            // checkout_latest should call list_table_versions exactly once
8118            let initial_list_calls = tracking_ns.list_table_versions_calls();
8119            let latest_dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone())
8120                .await
8121                .unwrap()
8122                .load()
8123                .await
8124                .unwrap();
8125            assert_eq!(latest_dataset.version().version, 2);
8126            assert_eq!(
8127                tracking_ns.list_table_versions_calls(),
8128                initial_list_calls + 1,
8129                "list_table_versions should have been called exactly once during checkout_latest"
8130            );
8131
8132            // checkout to specific version should call describe_table_version exactly once
8133            let initial_describe_calls = tracking_ns.describe_table_version_calls();
8134            let v1_dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone())
8135                .await
8136                .unwrap()
8137                .with_version(1)
8138                .load()
8139                .await
8140                .unwrap();
8141            assert_eq!(v1_dataset.version().version, 1);
8142            assert_eq!(
8143                tracking_ns.describe_table_version_calls(),
8144                initial_describe_calls + 1,
8145                "describe_table_version should have been called exactly once during checkout to version 1"
8146            );
8147        }
8148
8149        #[tokio::test]
8150        async fn test_dataset_commit_with_external_manifest_store() {
8151            use arrow::array::{Int32Array, StringArray};
8152            use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
8153            use arrow::record_batch::RecordBatch;
8154            use futures::TryStreamExt;
8155            use lance::dataset::{Dataset, WriteMode, WriteParams};
8156            use lance_namespace::models::CreateNamespaceRequest;
8157            use lance_table::io::commit::ManifestNamingScheme;
8158
8159            let temp_dir = TempStdDir::default();
8160            let temp_path = temp_dir.to_str().unwrap();
8161
8162            // Create namespace with table_version_tracking_enabled and manifest_enabled
8163            let inner_ns = DirectoryNamespaceBuilder::new(temp_path)
8164                .table_version_tracking_enabled(true)
8165                .manifest_enabled(true)
8166                .build()
8167                .await
8168                .unwrap();
8169
8170            let tracking_ns: Arc<dyn LanceNamespace> = Arc::new(TrackingNamespace::new(inner_ns));
8171
8172            // Create parent namespace
8173            let mut create_ns_req = CreateNamespaceRequest::new();
8174            create_ns_req.id = Some(vec!["workspace".to_string()]);
8175            tracking_ns.create_namespace(create_ns_req).await.unwrap();
8176
8177            // Create a table using write_into_namespace
8178            let table_id = vec!["workspace".to_string(), "test_table".to_string()];
8179            let arrow_schema = Arc::new(ArrowSchema::new(vec![
8180                Field::new("id", DataType::Int32, false),
8181                Field::new("name", DataType::Utf8, true),
8182            ]));
8183            let batch = RecordBatch::try_new(
8184                arrow_schema.clone(),
8185                vec![
8186                    Arc::new(Int32Array::from(vec![1, 2, 3])),
8187                    Arc::new(StringArray::from(vec!["a", "b", "c"])),
8188                ],
8189            )
8190            .unwrap();
8191            let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone());
8192            let write_params = WriteParams {
8193                mode: WriteMode::Create,
8194                ..Default::default()
8195            };
8196            let dataset = Dataset::write_into_namespace(
8197                batches,
8198                tracking_ns.clone(),
8199                table_id.clone(),
8200                Some(write_params),
8201            )
8202            .await
8203            .unwrap();
8204            assert_eq!(dataset.version().version, 1);
8205
8206            // Append data using write_into_namespace (APPEND mode)
8207            let batch2 = RecordBatch::try_new(
8208                arrow_schema.clone(),
8209                vec![
8210                    Arc::new(Int32Array::from(vec![4, 5, 6])),
8211                    Arc::new(StringArray::from(vec!["d", "e", "f"])),
8212                ],
8213            )
8214            .unwrap();
8215            let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema);
8216            let write_params = WriteParams {
8217                mode: WriteMode::Append,
8218                ..Default::default()
8219            };
8220            Dataset::write_into_namespace(
8221                batches,
8222                tracking_ns.clone(),
8223                table_id.clone(),
8224                Some(write_params),
8225            )
8226            .await
8227            .unwrap();
8228
8229            // Verify version 2 was created using the dataset's object_store
8230            // List manifests in the versions directory to find the V2 named manifest
8231            let manifest_metas: Vec<_> = dataset
8232                .object_store()
8233                .inner
8234                .list(Some(&dataset.versions_dir()))
8235                .try_collect()
8236                .await
8237                .unwrap();
8238            let version_2_found = manifest_metas.iter().any(|m| {
8239                m.location
8240                    .filename()
8241                    .map(|f| {
8242                        f.ends_with(".manifest")
8243                            && ManifestNamingScheme::V2.parse_version(f) == Some(2)
8244                    })
8245                    .unwrap_or(false)
8246            });
8247            assert!(
8248                version_2_found,
8249                "Version 2 manifest should exist in versions directory"
8250            );
8251        }
8252
8253        /// Helper: create a namespace and a table with some rows, returning (namespace, table_id)
8254        async fn create_ns_with_table() -> (DirectoryNamespace, TempStdDir, Vec<String>) {
8255            use arrow::array::{Int32Array, StringArray};
8256            use arrow::ipc::writer::StreamWriter;
8257
8258            let (namespace, temp_dir) = create_test_namespace().await;
8259
8260            let schema = create_test_schema();
8261            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8262            let arrow_schema = Arc::new(arrow_schema);
8263
8264            let id_array = Int32Array::from(vec![1, 2, 3]);
8265            let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
8266            let batch = arrow::record_batch::RecordBatch::try_new(
8267                arrow_schema.clone(),
8268                vec![Arc::new(id_array), Arc::new(name_array)],
8269            )
8270            .unwrap();
8271
8272            let mut buffer = Vec::new();
8273            {
8274                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8275                writer.write(&batch).unwrap();
8276                writer.finish().unwrap();
8277            }
8278
8279            let mut request = CreateTableRequest::new();
8280            let table_id = vec!["test_ops_table".to_string()];
8281            request.id = Some(table_id.clone());
8282
8283            namespace
8284                .create_table(request, Bytes::from(buffer))
8285                .await
8286                .unwrap();
8287
8288            (namespace, temp_dir, table_id)
8289        }
8290
8291        #[tokio::test]
8292        async fn test_count_table_rows_basic() {
8293            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8294
8295            let request = CountTableRowsRequest {
8296                id: Some(table_id),
8297                version: None,
8298                predicate: None,
8299                ..Default::default()
8300            };
8301
8302            let count = namespace.count_table_rows(request).await.unwrap();
8303            assert_eq!(count, 3);
8304        }
8305
8306        #[tokio::test]
8307        async fn test_count_table_rows_with_predicate() {
8308            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8309
8310            let request = CountTableRowsRequest {
8311                id: Some(table_id),
8312                version: None,
8313                predicate: Some("id > 1".to_string()),
8314                ..Default::default()
8315            };
8316
8317            let count = namespace.count_table_rows(request).await.unwrap();
8318            assert_eq!(count, 2);
8319        }
8320
8321        #[tokio::test]
8322        async fn test_query_table_invalid_distance_type() {
8323            let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8324
8325            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8326                single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8327                multi_vector: None,
8328            });
8329
8330            let request = QueryTableRequest {
8331                id: Some(table_id),
8332                k: 2,
8333                vector,
8334                vector_column: Some("vector".to_string()),
8335                distance_type: Some("invalid_metric".to_string()),
8336                filter: None,
8337                offset: None,
8338                version: None,
8339                ..Default::default()
8340            };
8341
8342            let result = namespace.query_table(request).await;
8343            assert!(result.is_err());
8344            let err_msg = result.unwrap_err().to_string();
8345            assert!(
8346                err_msg.contains("Unknown distance type"),
8347                "Expected error about unknown distance type, got: {}",
8348                err_msg
8349            );
8350        }
8351
8352        #[tokio::test]
8353        async fn test_insert_into_table_append() {
8354            use arrow::array::{Int32Array, StringArray};
8355            use arrow::ipc::writer::StreamWriter;
8356
8357            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8358
8359            // Prepare new data to insert
8360            let schema = create_test_schema();
8361            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8362            let arrow_schema = Arc::new(arrow_schema);
8363
8364            let id_array = Int32Array::from(vec![4, 5]);
8365            let name_array = StringArray::from(vec!["Dave", "Eve"]);
8366            let batch = arrow::record_batch::RecordBatch::try_new(
8367                arrow_schema.clone(),
8368                vec![Arc::new(id_array), Arc::new(name_array)],
8369            )
8370            .unwrap();
8371
8372            let mut buffer = Vec::new();
8373            {
8374                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8375                writer.write(&batch).unwrap();
8376                writer.finish().unwrap();
8377            }
8378
8379            let request = InsertIntoTableRequest {
8380                id: Some(table_id.clone()),
8381                mode: Some("append".to_string()),
8382                ..Default::default()
8383            };
8384
8385            let response = namespace
8386                .insert_into_table(request, Bytes::from(buffer))
8387                .await
8388                .unwrap();
8389            assert!(response.transaction_id.is_none());
8390
8391            // Verify total rows
8392            let count_req = CountTableRowsRequest {
8393                id: Some(table_id),
8394                version: None,
8395                predicate: None,
8396                ..Default::default()
8397            };
8398            let count = namespace.count_table_rows(count_req).await.unwrap();
8399            assert_eq!(count, 5);
8400        }
8401
8402        #[tokio::test]
8403        async fn test_insert_into_table_overwrite() {
8404            use arrow::array::{Int32Array, StringArray};
8405            use arrow::ipc::writer::StreamWriter;
8406
8407            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8408
8409            let schema = create_test_schema();
8410            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8411            let arrow_schema = Arc::new(arrow_schema);
8412
8413            let id_array = Int32Array::from(vec![10, 20]);
8414            let name_array = StringArray::from(vec!["X", "Y"]);
8415            let batch = arrow::record_batch::RecordBatch::try_new(
8416                arrow_schema.clone(),
8417                vec![Arc::new(id_array), Arc::new(name_array)],
8418            )
8419            .unwrap();
8420
8421            let mut buffer = Vec::new();
8422            {
8423                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8424                writer.write(&batch).unwrap();
8425                writer.finish().unwrap();
8426            }
8427
8428            let request = InsertIntoTableRequest {
8429                id: Some(table_id.clone()),
8430                mode: Some("overwrite".to_string()),
8431                ..Default::default()
8432            };
8433
8434            namespace
8435                .insert_into_table(request, Bytes::from(buffer))
8436                .await
8437                .unwrap();
8438
8439            // Verify overwrite: only 2 rows remain
8440            let count_req = CountTableRowsRequest {
8441                id: Some(table_id),
8442                version: None,
8443                predicate: None,
8444                ..Default::default()
8445            };
8446            let count = namespace.count_table_rows(count_req).await.unwrap();
8447            assert_eq!(count, 2);
8448        }
8449
8450        #[tokio::test]
8451        async fn test_insert_into_table_empty_data() {
8452            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8453
8454            let request = InsertIntoTableRequest {
8455                id: Some(table_id),
8456                mode: None,
8457                ..Default::default()
8458            };
8459
8460            let result = namespace.insert_into_table(request, Bytes::new()).await;
8461            assert!(result.is_err());
8462            assert!(
8463                result
8464                    .unwrap_err()
8465                    .to_string()
8466                    .contains("Arrow IPC stream) is required")
8467            );
8468        }
8469
8470        #[tokio::test]
8471        async fn test_insert_into_table_with_storage_options() {
8472            use arrow::array::{Int32Array, StringArray};
8473            use arrow::ipc::writer::StreamWriter;
8474
8475            let temp_dir = TempStdDir::default();
8476
8477            // Build namespace with a (no-op) storage option so self.storage_options is Some
8478            let namespace = DirectoryNamespaceBuilder::new(temp_dir.to_str().unwrap())
8479                .storage_option("allow_http", "true")
8480                .build()
8481                .await
8482                .unwrap();
8483
8484            // Create a table first
8485            let schema = create_test_schema();
8486            let ipc_data = create_test_ipc_data(&schema);
8487            let mut create_req = CreateTableRequest::new();
8488            let table_id = vec!["so_table".to_string()];
8489            create_req.id = Some(table_id.clone());
8490            namespace
8491                .create_table(create_req, Bytes::from(ipc_data))
8492                .await
8493                .unwrap();
8494
8495            // Insert with storage_options present — covers store_params closure
8496            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8497            let arrow_schema = Arc::new(arrow_schema);
8498
8499            let id_array = Int32Array::from(vec![10, 20]);
8500            let name_array = StringArray::from(vec!["X", "Y"]);
8501            let batch = arrow::record_batch::RecordBatch::try_new(
8502                arrow_schema.clone(),
8503                vec![Arc::new(id_array), Arc::new(name_array)],
8504            )
8505            .unwrap();
8506
8507            let mut buffer = Vec::new();
8508            {
8509                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8510                writer.write(&batch).unwrap();
8511                writer.finish().unwrap();
8512            }
8513
8514            let request = InsertIntoTableRequest {
8515                id: Some(table_id.clone()),
8516                mode: Some("append".to_string()),
8517                ..Default::default()
8518            };
8519
8520            let response = namespace
8521                .insert_into_table(request, Bytes::from(buffer))
8522                .await
8523                .unwrap();
8524            assert!(response.transaction_id.is_none());
8525
8526            // Verify rows were inserted
8527            let count_req = CountTableRowsRequest {
8528                id: Some(table_id),
8529                version: None,
8530                predicate: None,
8531                ..Default::default()
8532            };
8533            let count = namespace.count_table_rows(count_req).await.unwrap();
8534            assert_eq!(count, 2);
8535        }
8536
8537        #[tokio::test]
8538        async fn test_query_table_basic() {
8539            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8540
8541            let request = QueryTableRequest {
8542                id: Some(table_id),
8543                k: 10,
8544                filter: None,
8545                offset: None,
8546                version: None,
8547                ..Default::default()
8548            };
8549
8550            let bytes = namespace.query_table(request).await.unwrap();
8551
8552            // Decode IPC and verify
8553            let cursor = Cursor::new(bytes.to_vec());
8554            let reader = FileReader::try_new(cursor, None).unwrap();
8555            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8556            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8557            assert_eq!(total_rows, 3);
8558        }
8559
8560        #[tokio::test]
8561        async fn test_query_table_with_filter() {
8562            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8563
8564            let request = QueryTableRequest {
8565                id: Some(table_id),
8566                k: 10,
8567                filter: Some("id <= 2".to_string()),
8568                offset: None,
8569                version: None,
8570                ..Default::default()
8571            };
8572
8573            let bytes = namespace.query_table(request).await.unwrap();
8574
8575            let cursor = Cursor::new(bytes.to_vec());
8576            let reader = FileReader::try_new(cursor, None).unwrap();
8577            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8578            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8579            assert_eq!(total_rows, 2);
8580        }
8581
8582        #[tokio::test]
8583        async fn test_query_table_with_limit_and_offset() {
8584            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8585
8586            let request = QueryTableRequest {
8587                id: Some(table_id),
8588                k: 2,
8589                filter: None,
8590                offset: Some(1),
8591                version: None,
8592                ..Default::default()
8593            };
8594
8595            let bytes = namespace.query_table(request).await.unwrap();
8596
8597            let cursor = Cursor::new(bytes.to_vec());
8598            let reader = FileReader::try_new(cursor, None).unwrap();
8599            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8600            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8601            assert_eq!(total_rows, 2);
8602        }
8603
8604        #[tokio::test]
8605        async fn test_query_table_no_limit() {
8606            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8607
8608            // k=0 means no limit
8609            let request = QueryTableRequest {
8610                id: Some(table_id),
8611                k: 0,
8612                filter: None,
8613                offset: None,
8614                version: None,
8615                ..Default::default()
8616            };
8617
8618            let bytes = namespace.query_table(request).await.unwrap();
8619
8620            let cursor = Cursor::new(bytes.to_vec());
8621            let reader = FileReader::try_new(cursor, None).unwrap();
8622            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8623            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8624            assert_eq!(total_rows, 3);
8625        }
8626
8627        #[tokio::test]
8628        async fn test_query_table_with_columns() {
8629            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8630
8631            let columns = Box::new(lance_namespace::models::QueryTableRequestColumns {
8632                column_names: Some(vec!["id".to_string()]),
8633                column_aliases: None,
8634            });
8635
8636            let request = QueryTableRequest {
8637                id: Some(table_id),
8638                k: 10,
8639                filter: None,
8640                offset: None,
8641                version: None,
8642                columns: Some(columns),
8643                ..Default::default()
8644            };
8645
8646            let bytes = namespace.query_table(request).await.unwrap();
8647
8648            let cursor = Cursor::new(bytes.to_vec());
8649            let reader = FileReader::try_new(cursor, None).unwrap();
8650            let schema = reader.schema();
8651            assert_eq!(schema.fields().len(), 1);
8652            assert_eq!(schema.field(0).name(), "id");
8653            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8654            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8655            assert_eq!(total_rows, 3);
8656        }
8657
8658        #[tokio::test]
8659        async fn test_count_table_rows_with_version() {
8660            use arrow::array::{Int32Array, StringArray};
8661            use arrow::ipc::writer::StreamWriter;
8662
8663            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8664
8665            // Insert more data to create version 2
8666            let schema = create_test_schema();
8667            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8668            let arrow_schema = Arc::new(arrow_schema);
8669
8670            let id_array = Int32Array::from(vec![4, 5]);
8671            let name_array = StringArray::from(vec!["Dave", "Eve"]);
8672            let batch = arrow::record_batch::RecordBatch::try_new(
8673                arrow_schema.clone(),
8674                vec![Arc::new(id_array), Arc::new(name_array)],
8675            )
8676            .unwrap();
8677
8678            let mut buffer = Vec::new();
8679            {
8680                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8681                writer.write(&batch).unwrap();
8682                writer.finish().unwrap();
8683            }
8684
8685            let request = InsertIntoTableRequest {
8686                id: Some(table_id.clone()),
8687                mode: None,
8688                ..Default::default()
8689            };
8690            namespace
8691                .insert_into_table(request, Bytes::from(buffer))
8692                .await
8693                .unwrap();
8694
8695            // Version 1 should have 3 rows
8696            let count_req = CountTableRowsRequest {
8697                id: Some(table_id.clone()),
8698                version: Some(1),
8699                predicate: None,
8700                ..Default::default()
8701            };
8702            let count = namespace.count_table_rows(count_req).await.unwrap();
8703            assert_eq!(count, 3);
8704
8705            // Latest version should have 5 rows
8706            let count_req = CountTableRowsRequest {
8707                id: Some(table_id),
8708                version: None,
8709                predicate: None,
8710                ..Default::default()
8711            };
8712            let count = namespace.count_table_rows(count_req).await.unwrap();
8713            assert_eq!(count, 5);
8714        }
8715
8716        #[tokio::test]
8717        async fn test_query_table_with_version() {
8718            use arrow::array::{Int32Array, StringArray};
8719            use arrow::ipc::writer::StreamWriter;
8720
8721            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
8722
8723            // Insert more data to create version 2
8724            let schema = create_test_schema();
8725            let arrow_schema = convert_json_arrow_schema(&schema).unwrap();
8726            let arrow_schema = Arc::new(arrow_schema);
8727
8728            let id_array = Int32Array::from(vec![4, 5]);
8729            let name_array = StringArray::from(vec!["Dave", "Eve"]);
8730            let batch = arrow::record_batch::RecordBatch::try_new(
8731                arrow_schema.clone(),
8732                vec![Arc::new(id_array), Arc::new(name_array)],
8733            )
8734            .unwrap();
8735
8736            let mut buffer = Vec::new();
8737            {
8738                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8739                writer.write(&batch).unwrap();
8740                writer.finish().unwrap();
8741            }
8742
8743            let request = InsertIntoTableRequest {
8744                id: Some(table_id.clone()),
8745                mode: None,
8746                ..Default::default()
8747            };
8748            namespace
8749                .insert_into_table(request, Bytes::from(buffer))
8750                .await
8751                .unwrap();
8752
8753            // Query version 1 should return 3 rows
8754            let request = QueryTableRequest {
8755                id: Some(table_id.clone()),
8756                k: 100,
8757                filter: None,
8758                offset: None,
8759                version: Some(1),
8760                ..Default::default()
8761            };
8762
8763            let bytes = namespace.query_table(request).await.unwrap();
8764            let cursor = Cursor::new(bytes.to_vec());
8765            let reader = FileReader::try_new(cursor, None).unwrap();
8766            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8767            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8768            assert_eq!(total_rows, 3);
8769
8770            // Query latest version should return 5 rows
8771            let request = QueryTableRequest {
8772                id: Some(table_id),
8773                k: 100,
8774                filter: None,
8775                offset: None,
8776                version: None,
8777                ..Default::default()
8778            };
8779
8780            let bytes = namespace.query_table(request).await.unwrap();
8781            let cursor = Cursor::new(bytes.to_vec());
8782            let reader = FileReader::try_new(cursor, None).unwrap();
8783            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8784            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8785            assert_eq!(total_rows, 5);
8786        }
8787
8788        /// Helper to create a namespace with a table that has a vector column for
8789        /// vector search tests.
8790        async fn create_ns_with_vector_table() -> (DirectoryNamespace, TempStdDir, Vec<String>) {
8791            use arrow::array::{FixedSizeListArray, Float32Array, Int32Array};
8792            use arrow::ipc::writer::StreamWriter;
8793
8794            let (namespace, temp_dir) = create_test_namespace().await;
8795
8796            // Build schema: id (int32), vector (fixed_size_list<float32>[4])
8797            let arrow_schema = Arc::new(arrow::datatypes::Schema::new(vec![
8798                arrow::datatypes::Field::new("id", arrow::datatypes::DataType::Int32, false),
8799                arrow::datatypes::Field::new(
8800                    "vector",
8801                    arrow::datatypes::DataType::FixedSizeList(
8802                        Arc::new(arrow::datatypes::Field::new(
8803                            "item",
8804                            arrow::datatypes::DataType::Float32,
8805                            true,
8806                        )),
8807                        4,
8808                    ),
8809                    true,
8810                ),
8811            ]));
8812
8813            let id_array = Int32Array::from(vec![1, 2, 3]);
8814            let values = Float32Array::from(vec![
8815                1.0, 0.0, 0.0, 0.0, // vector for id=1
8816                0.0, 1.0, 0.0, 0.0, // vector for id=2
8817                0.0, 0.0, 1.0, 0.0, // vector for id=3
8818            ]);
8819            let vector_array = FixedSizeListArray::try_new(
8820                Arc::new(arrow::datatypes::Field::new(
8821                    "item",
8822                    arrow::datatypes::DataType::Float32,
8823                    true,
8824                )),
8825                4,
8826                Arc::new(values),
8827                None,
8828            )
8829            .unwrap();
8830
8831            let batch = arrow::record_batch::RecordBatch::try_new(
8832                arrow_schema.clone(),
8833                vec![Arc::new(id_array), Arc::new(vector_array)],
8834            )
8835            .unwrap();
8836
8837            let mut buffer = Vec::new();
8838            {
8839                let mut writer = StreamWriter::try_new(&mut buffer, &arrow_schema).unwrap();
8840                writer.write(&batch).unwrap();
8841                writer.finish().unwrap();
8842            }
8843
8844            // Write as a Lance dataset directly
8845            let table_name = "vector_table";
8846            let table_uri = format!("{}/{}.lance", temp_dir.to_str().unwrap(), table_name);
8847            let reader = arrow::record_batch::RecordBatchIterator::new(
8848                vec![Ok(batch)],
8849                arrow_schema.clone(),
8850            );
8851            Dataset::write(reader, &table_uri, None).await.unwrap();
8852
8853            let table_id = vec![table_name.to_string()];
8854            (namespace, temp_dir, table_id)
8855        }
8856
8857        #[tokio::test]
8858        async fn test_query_table_vector_search() {
8859            let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8860
8861            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8862                single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8863                multi_vector: None,
8864            });
8865
8866            let request = QueryTableRequest {
8867                id: Some(table_id),
8868                k: 2,
8869                vector,
8870                filter: None,
8871                offset: None,
8872                version: None,
8873                ..Default::default()
8874            };
8875
8876            let bytes = namespace.query_table(request).await.unwrap();
8877
8878            let cursor = Cursor::new(bytes.to_vec());
8879            let reader = FileReader::try_new(cursor, None).unwrap();
8880            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8881            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8882            assert_eq!(total_rows, 2);
8883        }
8884
8885        #[tokio::test]
8886        async fn test_query_table_vector_search_with_distance_type() {
8887            let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8888
8889            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8890                single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8891                multi_vector: None,
8892            });
8893
8894            let request = QueryTableRequest {
8895                id: Some(table_id),
8896                k: 3,
8897                vector,
8898                filter: None,
8899                offset: None,
8900                version: None,
8901                distance_type: Some("cosine".to_string()),
8902                ..Default::default()
8903            };
8904
8905            let bytes = namespace.query_table(request).await.unwrap();
8906
8907            let cursor = Cursor::new(bytes.to_vec());
8908            let reader = FileReader::try_new(cursor, None).unwrap();
8909            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8910            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8911            assert_eq!(total_rows, 3);
8912        }
8913
8914        #[tokio::test]
8915        async fn test_query_table_vector_search_with_filter() {
8916            let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8917
8918            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8919                single_vector: Some(vec![1.0, 0.0, 0.0, 0.0]),
8920                multi_vector: None,
8921            });
8922
8923            let request = QueryTableRequest {
8924                id: Some(table_id),
8925                k: 10,
8926                vector,
8927                filter: Some("id <= 2".to_string()),
8928                offset: None,
8929                version: None,
8930                ..Default::default()
8931            };
8932
8933            let bytes = namespace.query_table(request).await.unwrap();
8934
8935            let cursor = Cursor::new(bytes.to_vec());
8936            let reader = FileReader::try_new(cursor, None).unwrap();
8937            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8938            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8939            assert!(total_rows <= 2);
8940        }
8941
8942        #[tokio::test]
8943        async fn test_query_table_vector_search_with_nprobes_and_refine() {
8944            let (namespace, _temp_dir, table_id) = create_ns_with_vector_table().await;
8945
8946            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8947                single_vector: Some(vec![0.0, 1.0, 0.0, 0.0]),
8948                multi_vector: None,
8949            });
8950
8951            let request = QueryTableRequest {
8952                id: Some(table_id),
8953                k: 2,
8954                vector,
8955                filter: None,
8956                offset: None,
8957                version: None,
8958                nprobes: Some(1),
8959                refine_factor: Some(1),
8960                prefilter: Some(true),
8961                ..Default::default()
8962            };
8963
8964            let bytes = namespace.query_table(request).await.unwrap();
8965
8966            let cursor = Cursor::new(bytes.to_vec());
8967            let reader = FileReader::try_new(cursor, None).unwrap();
8968            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
8969            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
8970            assert_eq!(total_rows, 2);
8971        }
8972
8973        #[tokio::test]
8974        async fn test_namespace_id() {
8975            let (namespace, _temp_dir) = create_test_namespace().await;
8976            let id = namespace.namespace_id();
8977            assert!(id.contains("DirectoryNamespace"));
8978            assert!(id.contains("root"));
8979        }
8980
8981        #[tokio::test]
8982        async fn test_query_table_empty_table() {
8983            let (namespace, _temp_dir) = create_test_namespace().await;
8984
8985            // Create table with empty IPC data (schema only, no rows)
8986            let schema = create_test_schema();
8987            let ipc_data = create_test_ipc_data(&schema);
8988            let mut create_request = CreateTableRequest::new();
8989            create_request.id = Some(vec!["empty_table".to_string()]);
8990            namespace
8991                .create_table(create_request, bytes::Bytes::from(ipc_data))
8992                .await
8993                .unwrap();
8994
8995            // Query the empty table — should hit the "no batches" else branch
8996            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
8997                single_vector: None,
8998                multi_vector: None,
8999            });
9000            let request = QueryTableRequest {
9001                id: Some(vec!["empty_table".to_string()]),
9002                k: 10,
9003                vector,
9004                ..Default::default()
9005            };
9006            let bytes = namespace.query_table(request).await.unwrap();
9007
9008            let cursor = Cursor::new(bytes.to_vec());
9009            let reader = FileReader::try_new(cursor, None).unwrap();
9010            let batches: Vec<_> = reader.collect::<std::result::Result<Vec<_>, _>>().unwrap();
9011            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
9012            assert_eq!(total_rows, 0, "empty table should yield no rows");
9013        }
9014
9015        #[tokio::test]
9016        async fn test_query_table_with_plain_filter_no_vector() {
9017            let (namespace, _temp_dir, table_id) = create_ns_with_table().await;
9018
9019            // Query with filter but no vector (plain scan path + filter)
9020            let vector = Box::new(lance_namespace::models::QueryTableRequestVector {
9021                single_vector: None,
9022                multi_vector: None,
9023            });
9024            let request = QueryTableRequest {
9025                id: Some(table_id),
9026                k: 0,
9027                vector,
9028                filter: Some("id > 1".to_string()),
9029                ..Default::default()
9030            };
9031            let bytes = namespace.query_table(request).await.unwrap();
9032
9033            let cursor = Cursor::new(bytes.to_vec());
9034            let reader = FileReader::try_new(cursor, None).unwrap();
9035            let batches: Vec<_> = reader.into_iter().map(|b| b.unwrap()).collect();
9036            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
9037            assert!(total_rows > 0);
9038            assert!(total_rows < 3);
9039        }
9040    }
9041
9042    /// Tests for multi-table transaction support via table_version_storage_enabled.
9043    mod multi_table_transactions {
9044        use super::*;
9045        use futures::TryStreamExt;
9046        use lance::dataset::builder::DatasetBuilder;
9047        use lance_namespace::models::CreateTableVersionRequest;
9048
9049        /// Helper to create a namespace with table_version_storage_enabled enabled
9050        async fn create_managed_namespace(temp_path: &str) -> Arc<DirectoryNamespace> {
9051            Arc::new(
9052                DirectoryNamespaceBuilder::new(temp_path)
9053                    .table_version_tracking_enabled(true)
9054                    .table_version_storage_enabled(true)
9055                    .manifest_enabled(true)
9056                    .build()
9057                    .await
9058                    .unwrap(),
9059            )
9060        }
9061
9062        /// Helper to create a table and get its staging manifest path
9063        async fn create_table_and_get_staging(
9064            namespace: Arc<dyn LanceNamespace>,
9065            table_name: &str,
9066        ) -> (Vec<String>, object_store::path::Path) {
9067            let schema = create_test_schema();
9068            let ipc_data = create_test_ipc_data(&schema);
9069            let mut create_req = CreateTableRequest::new();
9070            create_req.id = Some(vec![table_name.to_string()]);
9071            namespace
9072                .create_table(create_req, bytes::Bytes::from(ipc_data))
9073                .await
9074                .unwrap();
9075
9076            let table_id = vec![table_name.to_string()];
9077            let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone())
9078                .await
9079                .unwrap()
9080                .load()
9081                .await
9082                .unwrap();
9083
9084            // Find existing manifest and create a staging copy
9085            let versions_path = dataset.versions_dir();
9086            let manifest_metas: Vec<_> = dataset
9087                .object_store()
9088                .inner
9089                .list(Some(&versions_path))
9090                .try_collect()
9091                .await
9092                .unwrap();
9093
9094            let manifest_meta = manifest_metas
9095                .iter()
9096                .find(|m| {
9097                    m.location
9098                        .filename()
9099                        .map(|f| f.ends_with(".manifest"))
9100                        .unwrap_or(false)
9101                })
9102                .expect("No manifest file found");
9103
9104            let manifest_data = dataset
9105                .object_store()
9106                .inner
9107                .get(&manifest_meta.location)
9108                .await
9109                .unwrap()
9110                .bytes()
9111                .await
9112                .unwrap();
9113
9114            let staging_path = dataset
9115                .versions_dir()
9116                .child(format!("staging_{}", table_name));
9117            dataset
9118                .object_store()
9119                .inner
9120                .put(&staging_path, manifest_data.into())
9121                .await
9122                .unwrap();
9123
9124            (table_id, staging_path)
9125        }
9126
9127        #[tokio::test]
9128        async fn test_table_version_storage_enabled_requires_manifest() {
9129            // table_version_storage_enabled=true requires manifest_enabled=true
9130            let temp_dir = TempStdDir::default();
9131            let temp_path = temp_dir.to_str().unwrap();
9132
9133            let result = DirectoryNamespaceBuilder::new(temp_path)
9134                .table_version_storage_enabled(true)
9135                .manifest_enabled(false)
9136                .build()
9137                .await;
9138
9139            assert!(
9140                result.is_err(),
9141                "Should fail when table_version_storage_enabled=true but manifest_enabled=false"
9142            );
9143        }
9144
9145        #[tokio::test]
9146        async fn test_create_table_version_records_in_manifest() {
9147            // When table_version_storage_enabled is enabled, single create_table_version
9148            // should also record the version in __manifest
9149            let temp_dir = TempStrDir::default();
9150            let temp_path: &str = &temp_dir;
9151
9152            let namespace = create_managed_namespace(temp_path).await;
9153            let ns: Arc<dyn LanceNamespace> = namespace.clone();
9154
9155            let (table_id, staging_path) =
9156                create_table_and_get_staging(ns.clone(), "table_managed").await;
9157
9158            // Create version 2
9159            let mut create_req = CreateTableVersionRequest::new(2, staging_path.to_string());
9160            create_req.id = Some(table_id.clone());
9161            create_req.naming_scheme = Some("V2".to_string());
9162            let response = namespace.create_table_version(create_req).await.unwrap();
9163
9164            assert!(response.version.is_some());
9165            let version = response.version.unwrap();
9166            assert_eq!(version.version, 2);
9167
9168            // Verify the version is recorded in __manifest by querying it
9169            let manifest_ns = namespace.manifest_ns.as_ref().unwrap();
9170            let table_id_str = manifest::ManifestNamespace::str_object_id(&table_id);
9171            let versions = manifest_ns
9172                .query_table_versions(&table_id_str, false, None)
9173                .await
9174                .unwrap();
9175
9176            assert!(
9177                !versions.is_empty(),
9178                "Version should be recorded in __manifest"
9179            );
9180            let (ver, _path) = &versions[0];
9181            assert_eq!(*ver, 2, "Recorded version should be 2");
9182        }
9183    }
9184
9185    #[tokio::test]
9186    async fn test_list_all_tables() {
9187        use lance_namespace::models::ListTablesRequest;
9188
9189        let (namespace, _temp_dir) = create_test_namespace().await;
9190        create_scalar_table(&namespace, "alpha").await;
9191        create_scalar_table(&namespace, "beta").await;
9192
9193        let request = ListTablesRequest {
9194            id: Some(vec![]),
9195            page_token: None,
9196            limit: None,
9197            ..Default::default()
9198        };
9199        let response = namespace.list_all_tables(request).await.unwrap();
9200        let mut tables = response.tables;
9201        tables.sort();
9202        assert_eq!(tables, vec!["alpha", "beta"]);
9203    }
9204
9205    #[tokio::test]
9206    async fn test_restore_table() {
9207        use lance_namespace::models::RestoreTableRequest;
9208
9209        let (namespace, _temp_dir) = create_test_namespace().await;
9210        create_scalar_table(&namespace, "users").await;
9211
9212        // Create a second version by creating a scalar index (this adds a new version)
9213        create_scalar_index(&namespace, "users", "users_id_idx").await;
9214
9215        let dataset = open_dataset(&namespace, "users").await;
9216        let current_version = dataset.version().version;
9217        assert!(current_version >= 2, "Should have at least 2 versions");
9218
9219        // Restore to version 1
9220        let mut restore_req = RestoreTableRequest::new(1);
9221        restore_req.id = Some(vec!["users".to_string()]);
9222        let response = namespace.restore_table(restore_req).await.unwrap();
9223
9224        // transaction_id should be present (the restore operation)
9225        assert!(
9226            response.transaction_id.is_some(),
9227            "restore_table should return a transaction_id"
9228        );
9229
9230        // Verify the dataset now has a new version (restore creates a new version)
9231        let dataset_after = open_dataset(&namespace, "users").await;
9232        assert!(
9233            dataset_after.version().version > current_version,
9234            "Restore should create a new version"
9235        );
9236    }
9237
9238    #[tokio::test]
9239    async fn test_update_table_schema_metadata() {
9240        use lance_namespace::models::UpdateTableSchemaMetadataRequest;
9241
9242        let (namespace, _temp_dir) = create_test_namespace().await;
9243        create_scalar_table(&namespace, "products").await;
9244
9245        let mut metadata = HashMap::new();
9246        metadata.insert("owner".to_string(), "team_a".to_string());
9247        metadata.insert("version".to_string(), "1.0".to_string());
9248
9249        let mut req = UpdateTableSchemaMetadataRequest::new();
9250        req.id = Some(vec!["products".to_string()]);
9251        req.metadata = Some(metadata.clone());
9252
9253        let response = namespace.update_table_schema_metadata(req).await.unwrap();
9254
9255        assert!(response.metadata.is_some());
9256        let returned = response.metadata.unwrap();
9257        assert_eq!(returned.get("owner"), Some(&"team_a".to_string()));
9258        assert_eq!(returned.get("version"), Some(&"1.0".to_string()));
9259        assert!(
9260            response.transaction_id.is_some(),
9261            "update_table_schema_metadata should return a transaction_id"
9262        );
9263    }
9264
9265    #[tokio::test]
9266    async fn test_get_table_stats() {
9267        use lance_namespace::models::GetTableStatsRequest;
9268
9269        let (namespace, _temp_dir) = create_test_namespace().await;
9270        create_scalar_table(&namespace, "items").await;
9271        create_scalar_index(&namespace, "items", "items_id_idx").await;
9272
9273        let mut req = GetTableStatsRequest::new();
9274        req.id = Some(vec!["items".to_string()]);
9275
9276        let response = namespace.get_table_stats(req).await.unwrap();
9277        assert_eq!(response.num_rows, 3);
9278        assert_eq!(response.num_indices, 1);
9279    }
9280
9281    #[tokio::test]
9282    async fn test_explain_table_query_plan() {
9283        use lance_namespace::models::QueryTableRequestVector;
9284        use lance_namespace::models::{ExplainTableQueryPlanRequest, QueryTableRequest};
9285
9286        let (namespace, _temp_dir) = create_test_namespace().await;
9287        create_scalar_table(&namespace, "catalog").await;
9288
9289        let mut query = QueryTableRequest::new(1, QueryTableRequestVector::new());
9290        query.filter = Some("id > 1".to_string());
9291        query.columns = Some(Box::new(QueryTableRequestColumns {
9292            column_names: Some(vec!["id".to_string(), "name".to_string()]),
9293            column_aliases: None,
9294        }));
9295        query.with_row_id = Some(true);
9296
9297        let mut req = ExplainTableQueryPlanRequest::new(query);
9298        req.id = Some(vec!["catalog".to_string()]);
9299
9300        let plan_str = namespace.explain_table_query_plan(req).await.unwrap();
9301        assert_plan_contains_all(
9302            &plan_str,
9303            &[
9304                "ProjectionExec: expr=[id@0 as id, name@2 as name",
9305                "Take: columns=\"id, _rowid, (name)\"",
9306                "LanceRead: uri=",
9307                "projection=[id]",
9308                "row_id=true, row_addr=false",
9309                "full_filter=id > Int32(1)",
9310                "refine_filter=id > Int32(1)",
9311            ],
9312            "Filtered explain plan should preserve late materialization and filter pushdown",
9313        );
9314    }
9315
9316    #[tokio::test]
9317    async fn test_analyze_table_query_plan() {
9318        use lance_namespace::models::AnalyzeTableQueryPlanRequest;
9319        use lance_namespace::models::QueryTableRequestVector;
9320
9321        let (namespace, _temp_dir) = create_test_namespace().await;
9322        create_scalar_table(&namespace, "catalog").await;
9323
9324        let mut req = AnalyzeTableQueryPlanRequest::new(1, QueryTableRequestVector::new());
9325        req.id = Some(vec!["catalog".to_string()]);
9326        req.filter = Some("id > 0".to_string());
9327        req.columns = Some(Box::new(QueryTableRequestColumns {
9328            column_names: Some(vec!["id".to_string(), "name".to_string()]),
9329            column_aliases: None,
9330        }));
9331        req.with_row_id = Some(true);
9332
9333        let analysis_str = namespace.analyze_table_query_plan(req).await.unwrap();
9334        assert_plan_contains_all(
9335            &analysis_str,
9336            &[
9337                "AnalyzeExec verbose=true",
9338                "ProjectionExec: elapsed=",
9339                "expr=[id@0 as id, name@2 as name",
9340                "Take: elapsed=",
9341                "columns=\"id, _rowid, (name)\"",
9342                "CoalesceBatchesExec: elapsed=",
9343                "LanceRead: elapsed=",
9344                "projection=[id]",
9345                "row_id=true, row_addr=false",
9346                "full_filter=id > Int32(0)",
9347                "refine_filter=id > Int32(0)",
9348                "metrics=[output_rows=",
9349            ],
9350            "Filtered analyze plan should preserve late materialization and filter pushdown",
9351        );
9352    }
9353
9354    #[tokio::test]
9355    async fn test_dir_listing_no_extra_calls_without_migration() {
9356        let temp_dir = TempStdDir::default();
9357        let temp_path = temp_dir.to_str().unwrap();
9358        let root_uri = file_object_store_uri(temp_path);
9359        let listing_count = Arc::new(AtomicUsize::new(0));
9360        let session = build_listing_counting_session(listing_count.clone());
9361
9362        // Create a table using dir-listing-only namespace
9363        let dir_only_ns = DirectoryNamespaceBuilder::new(root_uri.clone())
9364            .session(session.clone())
9365            .manifest_enabled(false)
9366            .dir_listing_enabled(true)
9367            .build()
9368            .await
9369            .unwrap();
9370
9371        let schema = create_test_schema();
9372        let ipc_data = create_test_ipc_data(&schema);
9373        let mut create_req = CreateTableRequest::new();
9374        create_req.id = Some(vec!["test_table".to_string()]);
9375        dir_only_ns
9376            .create_table(create_req, Bytes::from(ipc_data))
9377            .await
9378            .unwrap();
9379
9380        // Build a namespace with both enabled but migration disabled (default)
9381        let hybrid_ns = DirectoryNamespaceBuilder::new(root_uri)
9382            .session(session)
9383            .manifest_enabled(true)
9384            .dir_listing_enabled(true)
9385            .dir_listing_to_manifest_migration_enabled(false)
9386            .build()
9387            .await
9388            .unwrap();
9389
9390        // Reset counter before the operation we want to measure
9391        listing_count.store(0, Ordering::SeqCst);
9392
9393        // table_exists should use dir listing directly, making only 1 listing call
9394        let mut exists_req = TableExistsRequest::new();
9395        exists_req.id = Some(vec!["test_table".to_string()]);
9396        hybrid_ns.table_exists(exists_req).await.unwrap();
9397
9398        let count = listing_count.load(Ordering::SeqCst);
9399        assert_eq!(
9400            count, 1,
9401            "Expected exactly 1 listing call for table_exists \
9402             without migration mode, but got {}",
9403            count
9404        );
9405
9406        // Reset and test describe_table
9407        listing_count.store(0, Ordering::SeqCst);
9408
9409        let mut describe_req = DescribeTableRequest::new();
9410        describe_req.id = Some(vec!["test_table".to_string()]);
9411        hybrid_ns.describe_table(describe_req).await.unwrap();
9412
9413        let count = listing_count.load(Ordering::SeqCst);
9414        assert_eq!(
9415            count, 1,
9416            "Expected exactly 1 listing call for describe_table \
9417             without migration mode, but got {}",
9418            count
9419        );
9420    }
9421
9422    #[tokio::test]
9423    async fn test_describe_declared_table_checks_versions_only_when_requested() {
9424        let temp_dir = TempStdDir::default();
9425        let temp_path = temp_dir.to_str().unwrap();
9426        let root_uri = file_object_store_uri(temp_path);
9427        let listing_count = Arc::new(AtomicUsize::new(0));
9428        let session = build_listing_counting_session(listing_count.clone());
9429
9430        let namespace = DirectoryNamespaceBuilder::new(root_uri)
9431            .session(session)
9432            .manifest_enabled(false)
9433            .dir_listing_enabled(true)
9434            .build()
9435            .await
9436            .unwrap();
9437
9438        let mut declare_req = DeclareTableRequest::new();
9439        declare_req.id = Some(vec!["test_table".to_string()]);
9440        namespace.declare_table(declare_req).await.unwrap();
9441
9442        listing_count.store(0, Ordering::SeqCst);
9443
9444        let mut describe_req = DescribeTableRequest::new();
9445        describe_req.id = Some(vec!["test_table".to_string()]);
9446        let describe_response = namespace.describe_table(describe_req).await.unwrap();
9447
9448        assert_eq!(describe_response.is_only_declared, None);
9449        assert_eq!(
9450            listing_count.load(Ordering::SeqCst),
9451            1,
9452            "Default describe_table should only list the table directory"
9453        );
9454
9455        listing_count.store(0, Ordering::SeqCst);
9456
9457        let mut describe_req = DescribeTableRequest::new();
9458        describe_req.id = Some(vec!["test_table".to_string()]);
9459        describe_req.check_declared = Some(true);
9460        let describe_response = namespace.describe_table(describe_req).await.unwrap();
9461
9462        assert_eq!(describe_response.is_only_declared, Some(true));
9463        assert_eq!(
9464            listing_count.load(Ordering::SeqCst),
9465            2,
9466            "check_declared describe_table should list the table directory and _versions"
9467        );
9468    }
9469
9470    #[tokio::test]
9471    async fn test_dir_listing_extra_calls_with_migration() {
9472        let temp_dir = TempStdDir::default();
9473        let temp_path = temp_dir.to_str().unwrap();
9474        let root_uri = file_object_store_uri(temp_path);
9475        let listing_count = Arc::new(AtomicUsize::new(0));
9476        let session = build_listing_counting_session(listing_count.clone());
9477
9478        // Create a table using dir-listing-only namespace so it exists physically but is absent from __manifest.
9479        let dir_only_ns = DirectoryNamespaceBuilder::new(root_uri.clone())
9480            .session(session.clone())
9481            .manifest_enabled(false)
9482            .dir_listing_enabled(true)
9483            .build()
9484            .await
9485            .unwrap();
9486
9487        let schema = create_test_schema();
9488        let ipc_data = create_test_ipc_data(&schema);
9489        let mut create_req = CreateTableRequest::new();
9490        create_req.id = Some(vec!["test_table".to_string()]);
9491        dir_only_ns
9492            .create_table(create_req, Bytes::from(ipc_data))
9493            .await
9494            .unwrap();
9495
9496        let hybrid_ns = DirectoryNamespaceBuilder::new(root_uri)
9497            .session(session)
9498            .manifest_enabled(true)
9499            .dir_listing_enabled(true)
9500            .dir_listing_to_manifest_migration_enabled(true)
9501            .build()
9502            .await
9503            .unwrap();
9504
9505        // table_exists first checks __manifest (one list on __manifest/_versions),
9506        // then falls back to the table directory (one list_with_delimiter on test_table.lance).
9507        listing_count.store(0, Ordering::SeqCst);
9508
9509        let mut exists_req = TableExistsRequest::new();
9510        exists_req.id = Some(vec!["test_table".to_string()]);
9511        hybrid_ns.table_exists(exists_req).await.unwrap();
9512
9513        let count = listing_count.load(Ordering::SeqCst);
9514        assert_eq!(
9515            count, 2,
9516            "Expected exactly 2 listing calls for table_exists with migration mode \
9517             (manifest reload + table directory fallback), but got {}",
9518            count
9519        );
9520
9521        // describe_table follows the same path when the table is not yet registered in __manifest.
9522        listing_count.store(0, Ordering::SeqCst);
9523
9524        let mut describe_req = DescribeTableRequest::new();
9525        describe_req.id = Some(vec!["test_table".to_string()]);
9526        hybrid_ns.describe_table(describe_req).await.unwrap();
9527
9528        let count = listing_count.load(Ordering::SeqCst);
9529        assert_eq!(
9530            count, 2,
9531            "Expected exactly 2 listing calls for describe_table with migration mode \
9532             (manifest reload + table directory fallback), but got {}",
9533            count
9534        );
9535    }
9536
9537    #[tokio::test]
9538    async fn test_migration_not_found_errors_include_table_id() {
9539        let temp_dir = TempStdDir::default();
9540        let temp_path = temp_dir.to_str().unwrap();
9541
9542        let namespace = DirectoryNamespaceBuilder::new(temp_path)
9543            .manifest_enabled(true)
9544            .dir_listing_enabled(true)
9545            .dir_listing_to_manifest_migration_enabled(true)
9546            .build()
9547            .await
9548            .unwrap();
9549
9550        let mut exists_req = TableExistsRequest::new();
9551        exists_req.id = Some(vec!["missing_table".to_string()]);
9552        let err = namespace.table_exists(exists_req).await.unwrap_err();
9553        assert!(matches!(err, Error::Namespace { .. }));
9554        let err_msg = err.to_string();
9555        assert!(err_msg.contains("Table not found"));
9556        assert!(err_msg.contains("table id 'missing_table'"));
9557
9558        let mut describe_req = DescribeTableRequest::new();
9559        describe_req.id = Some(vec!["missing_table".to_string()]);
9560        let err = namespace.describe_table(describe_req).await.unwrap_err();
9561        assert!(matches!(err, Error::Namespace { .. }));
9562        let err_msg = err.to_string();
9563        assert!(err_msg.contains("Table not found"));
9564        assert!(err_msg.contains("table id 'missing_table'"));
9565    }
9566
9567    #[tokio::test]
9568    async fn test_manifest_not_found_errors_include_full_table_id() {
9569        use lance_namespace::models::CreateNamespaceRequest;
9570
9571        let temp_dir = TempStdDir::default();
9572        let temp_path = temp_dir.to_str().unwrap();
9573
9574        let namespace = DirectoryNamespaceBuilder::new(temp_path)
9575            .manifest_enabled(true)
9576            .dir_listing_enabled(true)
9577            .build()
9578            .await
9579            .unwrap();
9580
9581        let mut create_ns_req = CreateNamespaceRequest::new();
9582        create_ns_req.id = Some(vec!["workspace".to_string()]);
9583        namespace.create_namespace(create_ns_req).await.unwrap();
9584
9585        let missing_table_id = vec!["workspace".to_string(), "missing_table".to_string()];
9586
9587        let mut exists_req = TableExistsRequest::new();
9588        exists_req.id = Some(missing_table_id.clone());
9589        let err = namespace.table_exists(exists_req).await.unwrap_err();
9590        assert!(matches!(err, Error::Namespace { .. }));
9591        let err_msg = err.to_string();
9592        assert!(err_msg.contains("Table not found"));
9593        assert!(err_msg.contains("table id 'workspace$missing_table'"));
9594
9595        let mut describe_req = DescribeTableRequest::new();
9596        describe_req.id = Some(missing_table_id);
9597        let err = namespace.describe_table(describe_req).await.unwrap_err();
9598        assert!(matches!(err, Error::Namespace { .. }));
9599        let err_msg = err.to_string();
9600        assert!(err_msg.contains("Table not found"));
9601        assert!(err_msg.contains("table id 'workspace$missing_table'"));
9602    }
9603}