scim_server/storage/mod.rs
1//! Storage abstraction layer for SCIM resources.
2//!
3//! This module provides a clean separation between storage concerns and SCIM protocol logic.
4//! The `StorageProvider` trait defines pure data storage operations that are protocol-agnostic,
5//! allowing for pluggable storage backends while keeping SCIM-specific logic in the provider layer.
6//!
7//! # Architecture
8//!
9//! The storage layer is responsible for:
10//! - Pure PUT/GET/DELETE operations on JSON data
11//! - Tenant isolation and data organization
12//! - Basic querying and filtering
13//! - Data persistence and retrieval
14//!
15//! The storage layer is NOT responsible for:
16//! - SCIM metadata generation (timestamps, versions, etc.)
17//! - SCIM validation rules
18//! - Business logic (limits, permissions, etc.)
19//! - Protocol-specific transformations
20//!
21//! # Design Philosophy
22//!
23//! This interface follows the principle that at the storage level, CREATE and UPDATE are
24//! the same operation - you're just putting data at a location. The distinction between
25//! "create" vs "update" is business logic that belongs in the SCIM provider layer.
26//!
27//! # Example Usage
28//!
29//! ```rust
30//! use scim_server::storage::{StorageProvider, StorageKey, InMemoryStorage};
31//! use serde_json::json;
32//!
33//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
34//! let storage = InMemoryStorage::new();
35//!
36//! // Put a resource (works for both create and update)
37//! let key = StorageKey::new("tenant1", "User", "123");
38//! let user_data = json!({
39//! "id": "123",
40//! "userName": "john.doe",
41//! "displayName": "John Doe"
42//! });
43//! let stored_data = storage.put(key.clone(), user_data).await?;
44//!
45//! // Get the resource
46//! let retrieved = storage.get(key.clone()).await?;
47//! assert!(retrieved.is_some());
48//!
49//! // Delete the resource
50//! let was_deleted = storage.delete(key).await?;
51//! assert!(was_deleted);
52//! # Ok(())
53//! # }
54//! ```
55
56pub mod errors;
57pub mod in_memory;
58
59pub use errors::StorageError;
60pub use in_memory::{InMemoryStorage, InMemoryStorageStats};
61
62use serde_json::Value;
63use std::fmt;
64use std::future::Future;
65
66/// A hierarchical key for identifying resources in storage.
67///
68/// Resources are organized as: `tenant_id` → `resource_type` → `resource_id`
69/// This provides natural tenant isolation and efficient querying.
70#[derive(Debug, Clone, PartialEq, Eq, Hash)]
71pub struct StorageKey {
72 tenant_id: String,
73 resource_type: String,
74 resource_id: String,
75}
76
77impl StorageKey {
78 /// Create a new storage key.
79 pub fn new(
80 tenant_id: impl Into<String>,
81 resource_type: impl Into<String>,
82 resource_id: impl Into<String>,
83 ) -> Self {
84 Self {
85 tenant_id: tenant_id.into(),
86 resource_type: resource_type.into(),
87 resource_id: resource_id.into(),
88 }
89 }
90
91 /// Get the tenant ID.
92 pub fn tenant_id(&self) -> &str {
93 &self.tenant_id
94 }
95
96 /// Get the resource type.
97 pub fn resource_type(&self) -> &str {
98 &self.resource_type
99 }
100
101 /// Get the resource ID.
102 pub fn resource_id(&self) -> &str {
103 &self.resource_id
104 }
105
106 /// Create a prefix key for listing resources of a type within a tenant.
107 pub fn prefix(tenant_id: impl Into<String>, resource_type: impl Into<String>) -> StoragePrefix {
108 StoragePrefix {
109 tenant_id: tenant_id.into(),
110 resource_type: resource_type.into(),
111 }
112 }
113}
114
115impl fmt::Display for StorageKey {
116 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117 write!(
118 f,
119 "{}/{}/{}",
120 self.tenant_id, self.resource_type, self.resource_id
121 )
122 }
123}
124
125/// A prefix for querying resources by tenant and type.
126#[derive(Debug, Clone, PartialEq, Eq)]
127pub struct StoragePrefix {
128 tenant_id: String,
129 resource_type: String,
130}
131
132impl StoragePrefix {
133 /// Get the tenant ID.
134 pub fn tenant_id(&self) -> &str {
135 &self.tenant_id
136 }
137
138 /// Get the resource type.
139 pub fn resource_type(&self) -> &str {
140 &self.resource_type
141 }
142}
143
144impl fmt::Display for StoragePrefix {
145 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146 write!(f, "{}/{}", self.tenant_id, self.resource_type)
147 }
148}
149
150/// Core trait for storage providers that handle pure data persistence operations.
151///
152/// This trait defines a protocol-agnostic interface for storing and retrieving JSON data
153/// with tenant isolation. Implementations should focus solely on data persistence and
154/// retrieval without any SCIM-specific logic.
155///
156/// # Design Principles
157///
158/// - **PUT/GET/DELETE Model**: Simple, fundamental operations
159/// - **PUT Returns Data**: Supports SCIM requirement to return resource state after operations
160/// - **DELETE Returns Boolean**: Indicates whether resource existed (for proper HTTP status codes)
161/// - **Tenant Isolation**: All operations are scoped to a specific tenant via StorageKey
162/// - **Protocol Agnostic**: No awareness of SCIM structures or semantics
163/// - **Async First**: All operations return futures for scalability
164/// - **Error Transparency**: Storage errors are clearly separated from protocol errors
165///
166/// # Key Design Decisions
167///
168/// - **No separate CREATE/UPDATE**: Both are just PUT operations. Business logic determines
169/// whether this should be treated as create vs update.
170/// - **PUT returns stored data**: This enables SCIM providers to return the complete resource
171/// state after modifications without a separate GET call.
172/// - **DELETE returns boolean**: Allows proper HTTP status code handling (204 vs 404).
173pub trait StorageProvider: Send + Sync {
174 /// The error type returned by storage operations.
175 type Error: std::error::Error + Send + Sync + 'static;
176
177 /// Store data at the specified key and return the stored data.
178 ///
179 /// # Arguments
180 /// * `key` - The storage key identifying the resource location
181 /// * `data` - The JSON data to store
182 ///
183 /// # Returns
184 /// The data that was actually stored (may include storage-level metadata).
185 ///
186 /// # Behavior
187 /// - If a resource with the same key already exists, it is completely replaced
188 /// - The storage implementation should ensure atomic operations where possible
189 /// - No validation is performed on the data structure
190 /// - The returned data should be exactly what would be retrieved by `get()`
191 fn put(
192 &self,
193 key: StorageKey,
194 data: Value,
195 ) -> impl Future<Output = Result<Value, Self::Error>> + Send;
196
197 /// Retrieve data by key.
198 ///
199 /// # Arguments
200 /// * `key` - The storage key identifying the resource
201 ///
202 /// # Returns
203 /// `Some(data)` if the resource exists, `None` if it doesn't exist.
204 fn get(
205 &self,
206 key: StorageKey,
207 ) -> impl Future<Output = Result<Option<Value>, Self::Error>> + Send;
208
209 /// Delete data by key.
210 ///
211 /// # Arguments
212 /// * `key` - The storage key identifying the resource
213 ///
214 /// # Returns
215 /// `true` if the resource was deleted, `false` if it didn't exist.
216 ///
217 /// # Note
218 /// This follows SCIM/HTTP semantics where DELETE operations don't return resource data.
219 /// The boolean return value allows proper HTTP status code selection (204 vs 404).
220 fn delete(&self, key: StorageKey) -> impl Future<Output = Result<bool, Self::Error>> + Send;
221
222 /// List resources matching a prefix with pagination.
223 ///
224 /// # Arguments
225 /// * `prefix` - The storage prefix (tenant + resource type)
226 /// * `offset` - The number of resources to skip (0-based)
227 /// * `limit` - The maximum number of resources to return
228 ///
229 /// # Returns
230 /// A vector of (key, data) pairs.
231 ///
232 /// # Behavior
233 /// - Results should be consistently ordered (e.g., by resource ID)
234 /// - If `offset` exceeds the total count, an empty vector should be returned
235 /// - If `limit` is 0, an empty vector should be returned
236 fn list(
237 &self,
238 prefix: StoragePrefix,
239 offset: usize,
240 limit: usize,
241 ) -> impl Future<Output = Result<Vec<(StorageKey, Value)>, Self::Error>> + Send;
242
243 /// Find resources by a specific attribute value.
244 ///
245 /// # Arguments
246 /// * `prefix` - The storage prefix (tenant + resource type)
247 /// * `attribute` - The JSON path of the attribute to search (e.g., "userName", "emails.0.value")
248 /// * `value` - The exact value to match
249 ///
250 /// # Returns
251 /// A vector of (key, data) pairs for matching resources.
252 ///
253 /// # Behavior
254 /// - Performs exact string matching on the specified attribute
255 /// - Supports nested attributes using dot notation
256 /// - Returns all matching resources (no pagination)
257 /// - Empty vector if no matches found
258 fn find_by_attribute(
259 &self,
260 prefix: StoragePrefix,
261 attribute: &str,
262 value: &str,
263 ) -> impl Future<Output = Result<Vec<(StorageKey, Value)>, Self::Error>> + Send;
264
265 /// Check if a resource exists.
266 ///
267 /// # Arguments
268 /// * `key` - The storage key identifying the resource
269 ///
270 /// # Returns
271 /// `true` if the resource exists, `false` if it doesn't.
272 ///
273 /// # Performance Note
274 /// This should be more efficient than `get()` as it doesn't need to return data.
275 fn exists(&self, key: StorageKey) -> impl Future<Output = Result<bool, Self::Error>> + Send;
276
277 /// Count the total number of resources matching a prefix.
278 ///
279 /// # Arguments
280 /// * `prefix` - The storage prefix (tenant + resource type)
281 ///
282 /// # Returns
283 /// The total count of matching resources.
284 fn count(
285 &self,
286 prefix: StoragePrefix,
287 ) -> impl Future<Output = Result<usize, Self::Error>> + Send;
288
289 /// List all tenant IDs that currently have data in storage.
290 ///
291 /// Returns tenant IDs for all tenants that contain at least one resource of any type.
292 /// This method enables dynamic tenant discovery without requiring hardcoded tenant patterns.
293 ///
294 /// # Returns
295 ///
296 /// A vector of tenant ID strings. Empty vector if no tenants have data.
297 ///
298 /// # Errors
299 ///
300 /// Returns storage-specific errors if the discovery operation fails.
301 ///
302 /// # Examples
303 ///
304 /// ```rust
305 /// use scim_server::storage::{StorageProvider, InMemoryStorage};
306 ///
307 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
308 /// let storage = InMemoryStorage::new();
309 /// let tenants = storage.list_tenants().await?;
310 /// println!("Found {} tenants", tenants.len());
311 /// # Ok(())
312 /// # }
313 /// ```
314 fn list_tenants(&self) -> impl Future<Output = Result<Vec<String>, Self::Error>> + Send;
315
316 /// List all resource types for a specific tenant.
317 ///
318 /// Returns resource type names (e.g., "User", "Group") that exist within the specified
319 /// tenant. Only resource types with at least one stored resource are included.
320 ///
321 /// # Arguments
322 ///
323 /// * `tenant_id` - The tenant ID to query for resource types
324 ///
325 /// # Returns
326 ///
327 /// A vector of resource type strings. Empty vector if tenant doesn't exist or has no resources.
328 ///
329 /// # Errors
330 ///
331 /// Returns storage-specific errors if the query operation fails.
332 ///
333 /// # Examples
334 ///
335 /// ```rust
336 /// use scim_server::storage::{StorageProvider, InMemoryStorage};
337 ///
338 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
339 /// let storage = InMemoryStorage::new();
340 /// let types = storage.list_resource_types("tenant1").await?;
341 /// for resource_type in types {
342 /// println!("Tenant has resource type: {}", resource_type);
343 /// }
344 /// # Ok(())
345 /// # }
346 /// ```
347 fn list_resource_types(
348 &self,
349 tenant_id: &str,
350 ) -> impl Future<Output = Result<Vec<String>, Self::Error>> + Send;
351
352 /// List all resource types across all tenants.
353 ///
354 /// Returns a deduplicated collection of all resource type names found across all tenants
355 /// in storage. This provides a global view of resource types without tenant boundaries.
356 ///
357 /// # Returns
358 ///
359 /// A vector of unique resource type strings. Empty vector if no resources exist.
360 ///
361 /// # Errors
362 ///
363 /// Returns storage-specific errors if the discovery operation fails.
364 ///
365 /// # Examples
366 ///
367 /// ```rust
368 /// use scim_server::storage::{StorageProvider, InMemoryStorage};
369 ///
370 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
371 /// let storage = InMemoryStorage::new();
372 /// let all_types = storage.list_all_resource_types().await?;
373 /// println!("System supports {} resource types", all_types.len());
374 /// # Ok(())
375 /// # }
376 /// ```
377 fn list_all_resource_types(&self) -> impl Future<Output = Result<Vec<String>, Self::Error>> + Send;
378
379 /// Clear all data from storage.
380 ///
381 /// Removes all resources from all tenants, effectively resetting the storage to an empty state.
382 /// This operation is primarily intended for testing scenarios and should be used with caution
383 /// in production environments.
384 ///
385 /// # Returns
386 ///
387 /// `Ok(())` on successful clearing, or a storage-specific error on failure.
388 ///
389 /// # Errors
390 ///
391 /// Returns storage-specific errors if the clear operation fails partially or completely.
392 ///
393 /// # Behavior
394 ///
395 /// - Removes all resources from all tenants atomically where possible
396 /// - After successful clearing, [`list_tenants`] should return an empty vector
397 /// - Primarily intended for testing scenarios
398 ///
399 /// # Examples
400 ///
401 /// ```rust
402 /// use scim_server::storage::{StorageProvider, InMemoryStorage};
403 ///
404 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
405 /// let storage = InMemoryStorage::new();
406 /// // ... populate storage with data ...
407 /// storage.clear().await?;
408 /// let tenants = storage.list_tenants().await?;
409 /// assert_eq!(tenants.len(), 0);
410 /// # Ok(())
411 /// # }
412 /// ```
413 ///
414 /// [`list_tenants`]: Self::list_tenants
415 fn clear(&self) -> impl Future<Output = Result<(), Self::Error>> + Send;
416}
417
418#[cfg(test)]
419mod tests {
420 use super::*;
421
422 #[tokio::test]
423 async fn test_storage_key() {
424 let key = StorageKey::new("tenant1", "User", "123");
425 assert_eq!(key.tenant_id(), "tenant1");
426 assert_eq!(key.resource_type(), "User");
427 assert_eq!(key.resource_id(), "123");
428 assert_eq!(key.to_string(), "tenant1/User/123");
429 }
430
431 #[tokio::test]
432 async fn test_storage_prefix() {
433 let prefix = StorageKey::prefix("tenant1", "User");
434 assert_eq!(prefix.tenant_id(), "tenant1");
435 assert_eq!(prefix.resource_type(), "User");
436 assert_eq!(prefix.to_string(), "tenant1/User");
437 }
438}