lance_io/object_store/storage_options.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Storage options provider for dynamic credential fetching
5//!
6//! This module provides a trait for fetching storage options from various sources
7//! (namespace servers, secret managers, etc.) with support for expiration tracking
8//! and automatic refresh.
9
10use std::collections::HashMap;
11use std::fmt;
12use std::sync::Arc;
13
14use crate::{Error, Result};
15use async_trait::async_trait;
16use lance_namespace::models::DescribeTableRequest;
17use lance_namespace::LanceNamespace;
18use snafu::location;
19
20/// Key for the expiration timestamp in storage options HashMap
21pub const EXPIRES_AT_MILLIS_KEY: &str = "expires_at_millis";
22
23/// Trait for providing storage options with expiration tracking
24///
25/// Implementations can fetch storage options from various sources (namespace servers,
26/// secret managers, etc.) and are usable from Python/Java.
27///
28/// # Current Use Cases
29///
30/// - **Temporary Credentials**: Fetch short-lived AWS temporary credentials that expire
31/// after a set time period, with automatic refresh before expiration
32///
33/// # Future Possible Use Cases
34///
35/// - **Dynamic Storage Location Resolution**: Resolve logical names to actual storage
36/// locations (bucket aliases, S3 Access Points, region-specific endpoints) that may
37/// change based on region, tier, data migration, or failover scenarios
38/// - **Runtime S3 Tags Assignment**: Inject cost allocation tags, security labels, or
39/// compliance metadata into S3 requests based on the current execution context (user,
40/// application, workspace, etc.)
41/// - **Dynamic Endpoint Configuration**: Update storage endpoints for disaster recovery,
42/// A/B testing, or gradual migration scenarios
43/// - **Just-in-time Permission Elevation**: Request elevated permissions only when needed
44/// for sensitive operations, then immediately revoke them
45/// - **Secret Manager Integration**: Fetch encryption keys from AWS Secrets Manager,
46/// Azure Key Vault, or Google Secret Manager with automatic rotation
47/// - **OIDC/SAML Federation**: Integrate with identity providers to obtain storage
48/// credentials based on user identity and group membership
49///
50/// # Equality and Hashing
51///
52/// Implementations must provide `provider_id()` which returns a unique identifier for
53/// equality and hashing purposes. Two providers with the same ID are considered equal
54/// and will share the same cached ObjectStore in the registry.
55#[async_trait]
56pub trait StorageOptionsProvider: Send + Sync + fmt::Debug {
57 /// Fetch fresh storage options
58 ///
59 /// Returns None if no storage options are available, or Some(HashMap) with the options.
60 /// If the [`EXPIRES_AT_MILLIS_KEY`] key is present in the HashMap, it should contain the
61 /// epoch time in milliseconds when the options expire, and credentials will automatically
62 /// refresh before expiration.
63 /// If [`EXPIRES_AT_MILLIS_KEY`] is not provided, the options are considered to never expire.
64 async fn fetch_storage_options(&self) -> Result<Option<HashMap<String, String>>>;
65
66 /// Return a human-readable unique identifier for this provider instance
67 ///
68 /// This is used for equality comparison and hashing in the object store registry.
69 /// Two providers with the same ID will be treated as equal and share the same cached
70 /// ObjectStore.
71 ///
72 /// The ID should be human-readable for debugging and logging purposes.
73 /// For example: `"namespace[dir(root=/data)],table[db$schema$table1]"`
74 ///
75 /// The ID should uniquely identify the provider's configuration.
76 fn provider_id(&self) -> String;
77}
78
79/// StorageOptionsProvider implementation that fetches options from a LanceNamespace
80pub struct LanceNamespaceStorageOptionsProvider {
81 namespace: Arc<dyn LanceNamespace>,
82 table_id: Vec<String>,
83}
84
85impl fmt::Debug for LanceNamespaceStorageOptionsProvider {
86 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
87 write!(f, "{}", self.provider_id())
88 }
89}
90
91impl fmt::Display for LanceNamespaceStorageOptionsProvider {
92 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93 write!(f, "{}", self.provider_id())
94 }
95}
96
97impl LanceNamespaceStorageOptionsProvider {
98 /// Create a new LanceNamespaceStorageOptionsProvider
99 ///
100 /// # Arguments
101 /// * `namespace` - The namespace implementation to fetch storage options from
102 /// * `table_id` - The table identifier
103 pub fn new(namespace: Arc<dyn LanceNamespace>, table_id: Vec<String>) -> Self {
104 Self {
105 namespace,
106 table_id,
107 }
108 }
109}
110
111#[async_trait]
112impl StorageOptionsProvider for LanceNamespaceStorageOptionsProvider {
113 async fn fetch_storage_options(&self) -> Result<Option<HashMap<String, String>>> {
114 let request = DescribeTableRequest {
115 id: Some(self.table_id.clone()),
116 version: None,
117 };
118
119 let response = self
120 .namespace
121 .describe_table(request)
122 .await
123 .map_err(|e| Error::IO {
124 source: Box::new(std::io::Error::other(format!(
125 "Failed to fetch storage options: {}",
126 e
127 ))),
128 location: location!(),
129 })?;
130
131 Ok(response.storage_options)
132 }
133
134 fn provider_id(&self) -> String {
135 format!(
136 "LanceNamespaceStorageOptionsProvider {{ namespace: {}, table_id: {:?} }}",
137 self.namespace.namespace_id(),
138 self.table_id
139 )
140 }
141}