scim_server/storage/mod.rs
1//! Storage abstraction layer for SCIM resources.
2//!
3//! This module provides a clean separation between storage concerns and SCIM protocol logic.
4//! The `StorageProvider` trait defines pure data storage operations that are protocol-agnostic,
5//! allowing for pluggable storage backends while keeping SCIM-specific logic in the provider layer.
6//!
7//! # Architecture
8//!
9//! The storage layer is responsible for:
10//! - Pure PUT/GET/DELETE operations on JSON data
11//! - Tenant isolation and data organization
12//! - Basic querying and filtering
13//! - Data persistence and retrieval
14//!
15//! The storage layer is NOT responsible for:
16//! - SCIM metadata generation (timestamps, versions, etc.)
17//! - SCIM validation rules
18//! - Business logic (limits, permissions, etc.)
19//! - Protocol-specific transformations
20//!
21//! # Design Philosophy
22//!
23//! This interface follows the principle that at the storage level, CREATE and UPDATE are
24//! the same operation - you're just putting data at a location. The distinction between
25//! "create" vs "update" is business logic that belongs in the SCIM provider layer.
26//!
27//! # Example Usage
28//!
29//! ```rust
30//! use scim_server::storage::{StorageProvider, StorageKey, InMemoryStorage};
31//! use serde_json::json;
32//!
33//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
34//! let storage = InMemoryStorage::new();
35//!
36//! // Put a resource (works for both create and update)
37//! let key = StorageKey::new("tenant1", "User", "123");
38//! let user_data = json!({
39//! "id": "123",
40//! "userName": "john.doe",
41//! "displayName": "John Doe"
42//! });
43//! let stored_data = storage.put(key.clone(), user_data).await?;
44//!
45//! // Get the resource
46//! let retrieved = storage.get(key.clone()).await?;
47//! assert!(retrieved.is_some());
48//!
49//! // Delete the resource
50//! let was_deleted = storage.delete(key).await?;
51//! assert!(was_deleted);
52//! # Ok(())
53//! # }
54//! ```
55
56pub mod errors;
57pub mod in_memory;
58pub mod sqlite;
59
60#[cfg(test)]
61pub mod tests;
62
63pub use errors::StorageError;
64pub use in_memory::InMemoryStorage;
65pub use sqlite::SqliteStorage;
66
67use serde_json::Value;
68use std::fmt;
69use std::future::Future;
70
71/// Statistics about storage usage.
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub struct StorageStats {
74 /// Number of tenants with data
75 pub tenant_count: usize,
76 /// Number of resource types across all tenants
77 pub resource_type_count: usize,
78 /// Total number of individual resources
79 pub total_resources: usize,
80}
81
82/// Statistics about provider usage (moved from obsolete in_memory module).
83///
84/// This provides metrics about resource counts, tenants, and resource types
85/// for monitoring and debugging purposes.
86#[derive(Debug, Clone)]
87pub struct ProviderStats {
88 /// Number of active tenants in the provider
89 pub tenant_count: usize,
90 /// Total number of resources across all tenants
91 pub total_resources: usize,
92 /// Number of distinct resource types
93 pub resource_type_count: usize,
94 /// List of resource type names
95 pub resource_types: Vec<String>,
96}
97
98impl ProviderStats {
99 /// Create new empty statistics.
100 pub fn new() -> Self {
101 Self {
102 tenant_count: 0,
103 total_resources: 0,
104 resource_type_count: 0,
105 resource_types: Vec::new(),
106 }
107 }
108
109 /// Check if the provider is empty (no resources).
110 pub fn is_empty(&self) -> bool {
111 self.total_resources == 0
112 }
113}
114
115impl Default for ProviderStats {
116 fn default() -> Self {
117 Self::new()
118 }
119}
120
121/// A hierarchical key for identifying resources in storage.
122///
123/// Resources are organized as: `tenant_id` → `resource_type` → `resource_id`
124/// This provides natural tenant isolation and efficient querying.
125#[derive(Debug, Clone, PartialEq, Eq, Hash)]
126pub struct StorageKey {
127 tenant_id: String,
128 resource_type: String,
129 resource_id: String,
130}
131
132impl StorageKey {
133 /// Create a new storage key.
134 pub fn new(
135 tenant_id: impl Into<String>,
136 resource_type: impl Into<String>,
137 resource_id: impl Into<String>,
138 ) -> Self {
139 Self {
140 tenant_id: tenant_id.into(),
141 resource_type: resource_type.into(),
142 resource_id: resource_id.into(),
143 }
144 }
145
146 /// Get the tenant ID.
147 pub fn tenant_id(&self) -> &str {
148 &self.tenant_id
149 }
150
151 /// Get the resource type.
152 pub fn resource_type(&self) -> &str {
153 &self.resource_type
154 }
155
156 /// Get the resource ID.
157 pub fn resource_id(&self) -> &str {
158 &self.resource_id
159 }
160
161 /// Create a prefix key for listing resources of a type within a tenant.
162 pub fn prefix(tenant_id: impl Into<String>, resource_type: impl Into<String>) -> StoragePrefix {
163 StoragePrefix {
164 tenant_id: tenant_id.into(),
165 resource_type: resource_type.into(),
166 }
167 }
168}
169
170impl fmt::Display for StorageKey {
171 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
172 write!(
173 f,
174 "{}/{}/{}",
175 self.tenant_id, self.resource_type, self.resource_id
176 )
177 }
178}
179
180/// A prefix for querying resources by tenant and type.
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub struct StoragePrefix {
183 tenant_id: String,
184 resource_type: String,
185}
186
187impl StoragePrefix {
188 /// Get the tenant ID.
189 pub fn tenant_id(&self) -> &str {
190 &self.tenant_id
191 }
192
193 /// Get the resource type.
194 pub fn resource_type(&self) -> &str {
195 &self.resource_type
196 }
197}
198
199impl fmt::Display for StoragePrefix {
200 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
201 write!(f, "{}/{}", self.tenant_id, self.resource_type)
202 }
203}
204
205/// Core trait for storage providers that handle pure data persistence operations.
206///
207/// This trait defines a protocol-agnostic interface for storing and retrieving JSON data
208/// with tenant isolation. Implementations should focus solely on data persistence and
209/// retrieval without any SCIM-specific logic.
210///
211/// # Design Principles
212///
213/// - **PUT/GET/DELETE Model**: Simple, fundamental operations
214/// - **PUT Returns Data**: Supports SCIM requirement to return resource state after operations
215/// - **DELETE Returns Boolean**: Indicates whether resource existed (for proper HTTP status codes)
216/// - **Tenant Isolation**: All operations are scoped to a specific tenant via StorageKey
217/// - **Protocol Agnostic**: No awareness of SCIM structures or semantics
218/// - **Async First**: All operations return futures for scalability
219/// - **Error Transparency**: Storage errors are clearly separated from protocol errors
220///
221/// # Key Design Decisions
222///
223/// - **No separate CREATE/UPDATE**: Both are just PUT operations. Business logic determines
224/// whether this should be treated as create vs update.
225/// - **PUT returns stored data**: This enables SCIM providers to return the complete resource
226/// state after modifications without a separate GET call.
227/// - **DELETE returns boolean**: Allows proper HTTP status code handling (204 vs 404).
228pub trait StorageProvider: Send + Sync {
229 /// The error type returned by storage operations.
230 type Error: std::error::Error + Send + Sync + 'static;
231
232 /// Store data at the specified key and return the stored data.
233 ///
234 /// # Arguments
235 /// * `key` - The storage key identifying the resource location
236 /// * `data` - The JSON data to store
237 ///
238 /// # Returns
239 /// The data that was actually stored (may include storage-level metadata).
240 ///
241 /// # Behavior
242 /// - If a resource with the same key already exists, it is completely replaced
243 /// - The storage implementation should ensure atomic operations where possible
244 /// - No validation is performed on the data structure
245 /// - The returned data should be exactly what would be retrieved by `get()`
246 fn put(
247 &self,
248 key: StorageKey,
249 data: Value,
250 ) -> impl Future<Output = Result<Value, Self::Error>> + Send;
251
252 /// Retrieve data by key.
253 ///
254 /// # Arguments
255 /// * `key` - The storage key identifying the resource
256 ///
257 /// # Returns
258 /// `Some(data)` if the resource exists, `None` if it doesn't exist.
259 fn get(
260 &self,
261 key: StorageKey,
262 ) -> impl Future<Output = Result<Option<Value>, Self::Error>> + Send;
263
264 /// Delete data by key.
265 ///
266 /// # Arguments
267 /// * `key` - The storage key identifying the resource
268 ///
269 /// # Returns
270 /// `true` if the resource was deleted, `false` if it didn't exist.
271 ///
272 /// # Note
273 /// This follows SCIM/HTTP semantics where DELETE operations don't return resource data.
274 /// The boolean return value allows proper HTTP status code selection (204 vs 404).
275 fn delete(&self, key: StorageKey) -> impl Future<Output = Result<bool, Self::Error>> + Send;
276
277 /// List resources matching a prefix with pagination.
278 ///
279 /// # Arguments
280 /// * `prefix` - The storage prefix (tenant + resource type)
281 /// * `offset` - The number of resources to skip (0-based)
282 /// * `limit` - The maximum number of resources to return
283 ///
284 /// # Returns
285 /// A vector of (key, data) pairs.
286 ///
287 /// # Behavior
288 /// - Results should be consistently ordered (e.g., by resource ID)
289 /// - If `offset` exceeds the total count, an empty vector should be returned
290 /// - If `limit` is 0, an empty vector should be returned
291 fn list(
292 &self,
293 prefix: StoragePrefix,
294 offset: usize,
295 limit: usize,
296 ) -> impl Future<Output = Result<Vec<(StorageKey, Value)>, Self::Error>> + Send;
297
298 /// Find resources by a specific attribute value.
299 ///
300 /// # Arguments
301 /// * `prefix` - The storage prefix (tenant + resource type)
302 /// * `attribute` - The JSON path of the attribute to search (e.g., "userName", "emails.0.value")
303 /// * `value` - The exact value to match
304 ///
305 /// # Returns
306 /// A vector of (key, data) pairs for matching resources.
307 ///
308 /// # Behavior
309 /// - Performs exact string matching on the specified attribute
310 /// - Supports nested attributes using dot notation
311 /// - Returns all matching resources (no pagination)
312 /// - Empty vector if no matches found
313 fn find_by_attribute(
314 &self,
315 prefix: StoragePrefix,
316 attribute: &str,
317 value: &str,
318 ) -> impl Future<Output = Result<Vec<(StorageKey, Value)>, Self::Error>> + Send;
319
320 /// Check if a resource exists.
321 ///
322 /// # Arguments
323 /// * `key` - The storage key identifying the resource
324 ///
325 /// # Returns
326 /// `true` if the resource exists, `false` if it doesn't.
327 ///
328 /// # Performance Note
329 /// This should be more efficient than `get()` as it doesn't need to return data.
330 fn exists(&self, key: StorageKey) -> impl Future<Output = Result<bool, Self::Error>> + Send;
331
332 /// Count the total number of resources matching a prefix.
333 ///
334 /// # Arguments
335 /// * `prefix` - The storage prefix (tenant + resource type)
336 ///
337 /// # Returns
338 /// The total count of matching resources.
339 fn count(
340 &self,
341 prefix: StoragePrefix,
342 ) -> impl Future<Output = Result<usize, Self::Error>> + Send;
343
344 /// List all tenant IDs that currently have data in storage.
345 ///
346 /// Returns tenant IDs for all tenants that contain at least one resource of any type.
347 /// This method enables dynamic tenant discovery without requiring hardcoded tenant patterns.
348 ///
349 /// # Returns
350 ///
351 /// A vector of tenant ID strings. Empty vector if no tenants have data.
352 ///
353 /// # Errors
354 ///
355 /// Returns storage-specific errors if the discovery operation fails.
356 ///
357 /// # Examples
358 ///
359 /// ```rust
360 /// use scim_server::storage::{StorageProvider, InMemoryStorage};
361 ///
362 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
363 /// let storage = InMemoryStorage::new();
364 /// let tenants = storage.list_tenants().await?;
365 /// println!("Found {} tenants", tenants.len());
366 /// # Ok(())
367 /// # }
368 /// ```
369 fn list_tenants(&self) -> impl Future<Output = Result<Vec<String>, Self::Error>> + Send;
370
371 /// List all resource types for a specific tenant.
372 ///
373 /// Returns resource type names (e.g., "User", "Group") that exist within the specified
374 /// tenant. Only resource types with at least one stored resource are included.
375 ///
376 /// # Arguments
377 ///
378 /// * `tenant_id` - The tenant ID to query for resource types
379 ///
380 /// # Returns
381 ///
382 /// A vector of resource type strings. Empty vector if tenant doesn't exist or has no resources.
383 ///
384 /// # Errors
385 ///
386 /// Returns storage-specific errors if the query operation fails.
387 ///
388 /// # Examples
389 ///
390 /// ```rust
391 /// use scim_server::storage::{StorageProvider, InMemoryStorage};
392 ///
393 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
394 /// let storage = InMemoryStorage::new();
395 /// let types = storage.list_resource_types("tenant1").await?;
396 /// for resource_type in types {
397 /// println!("Tenant has resource type: {}", resource_type);
398 /// }
399 /// # Ok(())
400 /// # }
401 /// ```
402 fn list_resource_types(
403 &self,
404 tenant_id: &str,
405 ) -> impl Future<Output = Result<Vec<String>, Self::Error>> + Send;
406
407 /// List all resource types across all tenants.
408 ///
409 /// Returns a deduplicated collection of all resource type names found across all tenants
410 /// in storage. This provides a global view of resource types without tenant boundaries.
411 ///
412 /// # Returns
413 ///
414 /// A vector of unique resource type strings. Empty vector if no resources exist.
415 ///
416 /// # Errors
417 ///
418 /// Returns storage-specific errors if the discovery operation fails.
419 ///
420 /// # Examples
421 ///
422 /// ```rust
423 /// use scim_server::storage::{StorageProvider, InMemoryStorage};
424 ///
425 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
426 /// let storage = InMemoryStorage::new();
427 /// let all_types = storage.list_all_resource_types().await?;
428 /// println!("System supports {} resource types", all_types.len());
429 /// # Ok(())
430 /// # }
431 /// ```
432 fn list_all_resource_types(
433 &self,
434 ) -> impl Future<Output = Result<Vec<String>, Self::Error>> + Send;
435
436 /// Clear all data from storage.
437 ///
438 /// Removes all resources from all tenants, effectively resetting the storage to an empty state.
439 /// This operation is primarily intended for testing scenarios and should be used with caution
440 /// in production environments.
441 ///
442 /// # Returns
443 ///
444 /// `Ok(())` on successful clearing, or a storage-specific error on failure.
445 ///
446 /// # Errors
447 ///
448 /// Returns storage-specific errors if the clear operation fails partially or completely.
449 ///
450 /// # Behavior
451 ///
452 /// - Removes all resources from all tenants atomically where possible
453 /// - After successful clearing, [`list_tenants`] should return an empty vector
454 /// - Primarily intended for testing scenarios
455 ///
456 /// # Examples
457 ///
458 /// ```rust
459 /// use scim_server::storage::{StorageProvider, InMemoryStorage};
460 ///
461 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
462 /// let storage = InMemoryStorage::new();
463 /// // ... populate storage with data ...
464 /// storage.clear().await?;
465 /// let tenants = storage.list_tenants().await?;
466 /// assert_eq!(tenants.len(), 0);
467 /// # Ok(())
468 /// # }
469 /// ```
470 ///
471 /// [`list_tenants`]: Self::list_tenants
472 fn clear(&self) -> impl Future<Output = Result<(), Self::Error>> + Send;
473
474 /// Get storage statistics for debugging and monitoring.
475 ///
476 /// Returns statistics about storage usage including tenant count, resource type count,
477 /// and total number of resources across all tenants.
478 ///
479 /// # Returns
480 ///
481 /// A `StorageStats` struct containing usage metrics.
482 ///
483 /// # Errors
484 ///
485 /// Returns storage-specific errors if the stats collection operation fails.
486 ///
487 /// # Examples
488 ///
489 /// ```rust
490 /// use scim_server::storage::{StorageProvider, InMemoryStorage};
491 ///
492 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
493 /// let storage = InMemoryStorage::new();
494 /// let stats = storage.stats().await?;
495 /// println!("Total resources: {}", stats.total_resources);
496 /// # Ok(())
497 /// # }
498 /// ```
499 fn stats(&self) -> impl Future<Output = Result<StorageStats, Self::Error>> + Send;
500}