lance_io/object_store/
storage_options.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Storage options provider for dynamic credential fetching
5//!
6//! This module provides a trait for fetching storage options from various sources
7//! (namespace servers, secret managers, etc.) with support for expiration tracking
8//! and automatic refresh.
9
10use std::collections::HashMap;
11use std::fmt;
12use std::sync::Arc;
13
14use crate::{Error, Result};
15use async_trait::async_trait;
16use lance_namespace::models::DescribeTableRequest;
17use lance_namespace::LanceNamespace;
18use snafu::location;
19
20/// Key for the expiration timestamp in storage options HashMap
21pub const EXPIRES_AT_MILLIS_KEY: &str = "expires_at_millis";
22
23/// Trait for providing storage options with expiration tracking
24///
25/// Implementations can fetch storage options from various sources (namespace servers,
26/// secret managers, etc.) and are usable from Python/Java.
27///
28/// # Current Use Cases
29///
30/// - **Temporary Credentials**: Fetch short-lived AWS temporary credentials that expire
31///   after a set time period, with automatic refresh before expiration
32///
33/// # Future Possible Use Cases
34///
35/// - **Dynamic Storage Location Resolution**: Resolve logical names to actual storage
36///   locations (bucket aliases, S3 Access Points, region-specific endpoints) that may
37///   change based on region, tier, data migration, or failover scenarios
38/// - **Runtime S3 Tags Assignment**: Inject cost allocation tags, security labels, or
39///   compliance metadata into S3 requests based on the current execution context (user,
40///   application, workspace, etc.)
41/// - **Dynamic Endpoint Configuration**: Update storage endpoints for disaster recovery,
42///   A/B testing, or gradual migration scenarios
43/// - **Just-in-time Permission Elevation**: Request elevated permissions only when needed
44///   for sensitive operations, then immediately revoke them
45/// - **Secret Manager Integration**: Fetch encryption keys from AWS Secrets Manager,
46///   Azure Key Vault, or Google Secret Manager with automatic rotation
47/// - **OIDC/SAML Federation**: Integrate with identity providers to obtain storage
48///   credentials based on user identity and group membership
49///
50/// # Equality and Hashing
51///
52/// Implementations must provide `provider_id()` which returns a unique identifier for
53/// equality and hashing purposes. Two providers with the same ID are considered equal
54/// and will share the same cached ObjectStore in the registry.
55#[async_trait]
56pub trait StorageOptionsProvider: Send + Sync + fmt::Debug {
57    /// Fetch fresh storage options
58    ///
59    /// Returns None if no storage options are available, or Some(HashMap) with the options.
60    /// If the [`EXPIRES_AT_MILLIS_KEY`] key is present in the HashMap, it should contain the
61    /// epoch time in milliseconds when the options expire, and credentials will automatically
62    /// refresh before expiration.
63    /// If [`EXPIRES_AT_MILLIS_KEY`] is not provided, the options are considered to never expire.
64    async fn fetch_storage_options(&self) -> Result<Option<HashMap<String, String>>>;
65
66    /// Return a human-readable unique identifier for this provider instance
67    ///
68    /// This is used for equality comparison and hashing in the object store registry.
69    /// Two providers with the same ID will be treated as equal and share the same cached
70    /// ObjectStore.
71    ///
72    /// The ID should be human-readable for debugging and logging purposes.
73    /// For example: `"namespace[dir(root=/data)],table[db$schema$table1]"`
74    ///
75    /// The ID should uniquely identify the provider's configuration.
76    fn provider_id(&self) -> String;
77}
78
79/// StorageOptionsProvider implementation that fetches options from a LanceNamespace
80pub struct LanceNamespaceStorageOptionsProvider {
81    namespace: Arc<dyn LanceNamespace>,
82    table_id: Vec<String>,
83}
84
85impl fmt::Debug for LanceNamespaceStorageOptionsProvider {
86    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
87        write!(f, "{}", self.provider_id())
88    }
89}
90
91impl fmt::Display for LanceNamespaceStorageOptionsProvider {
92    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93        write!(f, "{}", self.provider_id())
94    }
95}
96
97impl LanceNamespaceStorageOptionsProvider {
98    /// Create a new LanceNamespaceStorageOptionsProvider
99    ///
100    /// # Arguments
101    /// * `namespace` - The namespace implementation to fetch storage options from
102    /// * `table_id` - The table identifier
103    pub fn new(namespace: Arc<dyn LanceNamespace>, table_id: Vec<String>) -> Self {
104        Self {
105            namespace,
106            table_id,
107        }
108    }
109}
110
111#[async_trait]
112impl StorageOptionsProvider for LanceNamespaceStorageOptionsProvider {
113    async fn fetch_storage_options(&self) -> Result<Option<HashMap<String, String>>> {
114        let request = DescribeTableRequest {
115            id: Some(self.table_id.clone()),
116            version: None,
117        };
118
119        let response = self
120            .namespace
121            .describe_table(request)
122            .await
123            .map_err(|e| Error::IO {
124                source: Box::new(std::io::Error::other(format!(
125                    "Failed to fetch storage options: {}",
126                    e
127                ))),
128                location: location!(),
129            })?;
130
131        Ok(response.storage_options)
132    }
133
134    fn provider_id(&self) -> String {
135        format!(
136            "LanceNamespaceStorageOptionsProvider {{ namespace: {}, table_id: {:?} }}",
137            self.namespace.namespace_id(),
138            self.table_id
139        )
140    }
141}