scim_server/storage/
mod.rs

1//! Storage abstraction layer for SCIM resources.
2//!
3//! This module provides a clean separation between storage concerns and SCIM protocol logic.
4//! The `StorageProvider` trait defines pure data storage operations that are protocol-agnostic,
5//! allowing for pluggable storage backends while keeping SCIM-specific logic in the provider layer.
6//!
7//! # Architecture
8//!
9//! The storage layer is responsible for:
10//! - Pure PUT/GET/DELETE operations on JSON data
11//! - Tenant isolation and data organization
12//! - Basic querying and filtering
13//! - Data persistence and retrieval
14//!
15//! The storage layer is NOT responsible for:
16//! - SCIM metadata generation (timestamps, versions, etc.)
17//! - SCIM validation rules
18//! - Business logic (limits, permissions, etc.)
19//! - Protocol-specific transformations
20//!
21//! # Design Philosophy
22//!
23//! This interface follows the principle that at the storage level, CREATE and UPDATE are
24//! the same operation - you're just putting data at a location. The distinction between
25//! "create" vs "update" is business logic that belongs in the SCIM provider layer.
26//!
27//! # Example Usage
28//!
29//! ```rust
30//! use scim_server::storage::{StorageProvider, StorageKey, InMemoryStorage};
31//! use serde_json::json;
32//!
33//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
34//! let storage = InMemoryStorage::new();
35//!
36//! // Put a resource (works for both create and update)
37//! let key = StorageKey::new("tenant1", "User", "123");
38//! let user_data = json!({
39//!     "id": "123",
40//!     "userName": "john.doe",
41//!     "displayName": "John Doe"
42//! });
43//! let stored_data = storage.put(key.clone(), user_data).await?;
44//!
45//! // Get the resource
46//! let retrieved = storage.get(key.clone()).await?;
47//! assert!(retrieved.is_some());
48//!
49//! // Delete the resource
50//! let was_deleted = storage.delete(key).await?;
51//! assert!(was_deleted);
52//! # Ok(())
53//! # }
54//! ```
55
56pub mod errors;
57pub mod in_memory;
58
59pub use errors::StorageError;
60pub use in_memory::{InMemoryStorage, InMemoryStorageStats};
61
62use serde_json::Value;
63use std::fmt;
64use std::future::Future;
65
66/// A hierarchical key for identifying resources in storage.
67///
68/// Resources are organized as: `tenant_id` → `resource_type` → `resource_id`
69/// This provides natural tenant isolation and efficient querying.
70#[derive(Debug, Clone, PartialEq, Eq, Hash)]
71pub struct StorageKey {
72    tenant_id: String,
73    resource_type: String,
74    resource_id: String,
75}
76
77impl StorageKey {
78    /// Create a new storage key.
79    pub fn new(
80        tenant_id: impl Into<String>,
81        resource_type: impl Into<String>,
82        resource_id: impl Into<String>,
83    ) -> Self {
84        Self {
85            tenant_id: tenant_id.into(),
86            resource_type: resource_type.into(),
87            resource_id: resource_id.into(),
88        }
89    }
90
91    /// Get the tenant ID.
92    pub fn tenant_id(&self) -> &str {
93        &self.tenant_id
94    }
95
96    /// Get the resource type.
97    pub fn resource_type(&self) -> &str {
98        &self.resource_type
99    }
100
101    /// Get the resource ID.
102    pub fn resource_id(&self) -> &str {
103        &self.resource_id
104    }
105
106    /// Create a prefix key for listing resources of a type within a tenant.
107    pub fn prefix(tenant_id: impl Into<String>, resource_type: impl Into<String>) -> StoragePrefix {
108        StoragePrefix {
109            tenant_id: tenant_id.into(),
110            resource_type: resource_type.into(),
111        }
112    }
113}
114
115impl fmt::Display for StorageKey {
116    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117        write!(
118            f,
119            "{}/{}/{}",
120            self.tenant_id, self.resource_type, self.resource_id
121        )
122    }
123}
124
125/// A prefix for querying resources by tenant and type.
126#[derive(Debug, Clone, PartialEq, Eq)]
127pub struct StoragePrefix {
128    tenant_id: String,
129    resource_type: String,
130}
131
132impl StoragePrefix {
133    /// Get the tenant ID.
134    pub fn tenant_id(&self) -> &str {
135        &self.tenant_id
136    }
137
138    /// Get the resource type.
139    pub fn resource_type(&self) -> &str {
140        &self.resource_type
141    }
142}
143
144impl fmt::Display for StoragePrefix {
145    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146        write!(f, "{}/{}", self.tenant_id, self.resource_type)
147    }
148}
149
150/// Core trait for storage providers that handle pure data persistence operations.
151///
152/// This trait defines a protocol-agnostic interface for storing and retrieving JSON data
153/// with tenant isolation. Implementations should focus solely on data persistence and
154/// retrieval without any SCIM-specific logic.
155///
156/// # Design Principles
157///
158/// - **PUT/GET/DELETE Model**: Simple, fundamental operations
159/// - **PUT Returns Data**: Supports SCIM requirement to return resource state after operations
160/// - **DELETE Returns Boolean**: Indicates whether resource existed (for proper HTTP status codes)
161/// - **Tenant Isolation**: All operations are scoped to a specific tenant via StorageKey
162/// - **Protocol Agnostic**: No awareness of SCIM structures or semantics
163/// - **Async First**: All operations return futures for scalability
164/// - **Error Transparency**: Storage errors are clearly separated from protocol errors
165///
166/// # Key Design Decisions
167///
168/// - **No separate CREATE/UPDATE**: Both are just PUT operations. Business logic determines
169///   whether this should be treated as create vs update.
170/// - **PUT returns stored data**: This enables SCIM providers to return the complete resource
171///   state after modifications without a separate GET call.
172/// - **DELETE returns boolean**: Allows proper HTTP status code handling (204 vs 404).
173pub trait StorageProvider: Send + Sync {
174    /// The error type returned by storage operations.
175    type Error: std::error::Error + Send + Sync + 'static;
176
177    /// Store data at the specified key and return the stored data.
178    ///
179    /// # Arguments
180    /// * `key` - The storage key identifying the resource location
181    /// * `data` - The JSON data to store
182    ///
183    /// # Returns
184    /// The data that was actually stored (may include storage-level metadata).
185    ///
186    /// # Behavior
187    /// - If a resource with the same key already exists, it is completely replaced
188    /// - The storage implementation should ensure atomic operations where possible
189    /// - No validation is performed on the data structure
190    /// - The returned data should be exactly what would be retrieved by `get()`
191    fn put(
192        &self,
193        key: StorageKey,
194        data: Value,
195    ) -> impl Future<Output = Result<Value, Self::Error>> + Send;
196
197    /// Retrieve data by key.
198    ///
199    /// # Arguments
200    /// * `key` - The storage key identifying the resource
201    ///
202    /// # Returns
203    /// `Some(data)` if the resource exists, `None` if it doesn't exist.
204    fn get(
205        &self,
206        key: StorageKey,
207    ) -> impl Future<Output = Result<Option<Value>, Self::Error>> + Send;
208
209    /// Delete data by key.
210    ///
211    /// # Arguments
212    /// * `key` - The storage key identifying the resource
213    ///
214    /// # Returns
215    /// `true` if the resource was deleted, `false` if it didn't exist.
216    ///
217    /// # Note
218    /// This follows SCIM/HTTP semantics where DELETE operations don't return resource data.
219    /// The boolean return value allows proper HTTP status code selection (204 vs 404).
220    fn delete(&self, key: StorageKey) -> impl Future<Output = Result<bool, Self::Error>> + Send;
221
222    /// List resources matching a prefix with pagination.
223    ///
224    /// # Arguments
225    /// * `prefix` - The storage prefix (tenant + resource type)
226    /// * `offset` - The number of resources to skip (0-based)
227    /// * `limit` - The maximum number of resources to return
228    ///
229    /// # Returns
230    /// A vector of (key, data) pairs.
231    ///
232    /// # Behavior
233    /// - Results should be consistently ordered (e.g., by resource ID)
234    /// - If `offset` exceeds the total count, an empty vector should be returned
235    /// - If `limit` is 0, an empty vector should be returned
236    fn list(
237        &self,
238        prefix: StoragePrefix,
239        offset: usize,
240        limit: usize,
241    ) -> impl Future<Output = Result<Vec<(StorageKey, Value)>, Self::Error>> + Send;
242
243    /// Find resources by a specific attribute value.
244    ///
245    /// # Arguments
246    /// * `prefix` - The storage prefix (tenant + resource type)
247    /// * `attribute` - The JSON path of the attribute to search (e.g., "userName", "emails.0.value")
248    /// * `value` - The exact value to match
249    ///
250    /// # Returns
251    /// A vector of (key, data) pairs for matching resources.
252    ///
253    /// # Behavior
254    /// - Performs exact string matching on the specified attribute
255    /// - Supports nested attributes using dot notation
256    /// - Returns all matching resources (no pagination)
257    /// - Empty vector if no matches found
258    fn find_by_attribute(
259        &self,
260        prefix: StoragePrefix,
261        attribute: &str,
262        value: &str,
263    ) -> impl Future<Output = Result<Vec<(StorageKey, Value)>, Self::Error>> + Send;
264
265    /// Check if a resource exists.
266    ///
267    /// # Arguments
268    /// * `key` - The storage key identifying the resource
269    ///
270    /// # Returns
271    /// `true` if the resource exists, `false` if it doesn't.
272    ///
273    /// # Performance Note
274    /// This should be more efficient than `get()` as it doesn't need to return data.
275    fn exists(&self, key: StorageKey) -> impl Future<Output = Result<bool, Self::Error>> + Send;
276
277    /// Count the total number of resources matching a prefix.
278    ///
279    /// # Arguments
280    /// * `prefix` - The storage prefix (tenant + resource type)
281    ///
282    /// # Returns
283    /// The total count of matching resources.
284    fn count(
285        &self,
286        prefix: StoragePrefix,
287    ) -> impl Future<Output = Result<usize, Self::Error>> + Send;
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[tokio::test]
295    async fn test_storage_key() {
296        let key = StorageKey::new("tenant1", "User", "123");
297        assert_eq!(key.tenant_id(), "tenant1");
298        assert_eq!(key.resource_type(), "User");
299        assert_eq!(key.resource_id(), "123");
300        assert_eq!(key.to_string(), "tenant1/User/123");
301    }
302
303    #[tokio::test]
304    async fn test_storage_prefix() {
305        let prefix = StorageKey::prefix("tenant1", "User");
306        assert_eq!(prefix.tenant_id(), "tenant1");
307        assert_eq!(prefix.resource_type(), "User");
308        assert_eq!(prefix.to_string(), "tenant1/User");
309    }
310}