scim_server/storage/
mod.rs

1//! Storage abstraction layer for SCIM resources.
2//!
3//! This module provides a clean separation between storage concerns and SCIM protocol logic.
4//! The `StorageProvider` trait defines pure data storage operations that are protocol-agnostic,
5//! allowing for pluggable storage backends while keeping SCIM-specific logic in the provider layer.
6//!
7//! # Architecture
8//!
9//! The storage layer is responsible for:
10//! - Pure PUT/GET/DELETE operations on JSON data
11//! - Tenant isolation and data organization
12//! - Basic querying and filtering
13//! - Data persistence and retrieval
14//!
15//! The storage layer is NOT responsible for:
16//! - SCIM metadata generation (timestamps, versions, etc.)
17//! - SCIM validation rules
18//! - Business logic (limits, permissions, etc.)
19//! - Protocol-specific transformations
20//!
21//! # Design Philosophy
22//!
23//! This interface follows the principle that at the storage level, CREATE and UPDATE are
24//! the same operation - you're just putting data at a location. The distinction between
25//! "create" vs "update" is business logic that belongs in the SCIM provider layer.
26//!
27//! # Example Usage
28//!
29//! ```rust
30//! use scim_server::storage::{StorageProvider, StorageKey, InMemoryStorage};
31//! use serde_json::json;
32//!
33//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
34//! let storage = InMemoryStorage::new();
35//!
36//! // Put a resource (works for both create and update)
37//! let key = StorageKey::new("tenant1", "User", "123");
38//! let user_data = json!({
39//!     "id": "123",
40//!     "userName": "john.doe",
41//!     "displayName": "John Doe"
42//! });
43//! let stored_data = storage.put(key.clone(), user_data).await?;
44//!
45//! // Get the resource
46//! let retrieved = storage.get(key.clone()).await?;
47//! assert!(retrieved.is_some());
48//!
49//! // Delete the resource
50//! let was_deleted = storage.delete(key).await?;
51//! assert!(was_deleted);
52//! # Ok(())
53//! # }
54//! ```
55
56pub mod errors;
57pub mod in_memory;
58pub mod sqlite;
59
60#[cfg(test)]
61pub mod tests;
62
63pub use errors::StorageError;
64pub use in_memory::InMemoryStorage;
65pub use sqlite::SqliteStorage;
66
67use serde_json::Value;
68use std::fmt;
69use std::future::Future;
70
71/// Statistics about storage usage.
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub struct StorageStats {
74    /// Number of tenants with data
75    pub tenant_count: usize,
76    /// Number of resource types across all tenants
77    pub resource_type_count: usize,
78    /// Total number of individual resources
79    pub total_resources: usize,
80}
81
82/// Statistics about provider usage (moved from obsolete in_memory module).
83///
84/// This provides metrics about resource counts, tenants, and resource types
85/// for monitoring and debugging purposes.
86#[derive(Debug, Clone)]
87pub struct ProviderStats {
88    /// Number of active tenants in the provider
89    pub tenant_count: usize,
90    /// Total number of resources across all tenants
91    pub total_resources: usize,
92    /// Number of distinct resource types
93    pub resource_type_count: usize,
94    /// List of resource type names
95    pub resource_types: Vec<String>,
96}
97
98impl ProviderStats {
99    /// Create new empty statistics.
100    pub fn new() -> Self {
101        Self {
102            tenant_count: 0,
103            total_resources: 0,
104            resource_type_count: 0,
105            resource_types: Vec::new(),
106        }
107    }
108
109    /// Check if the provider is empty (no resources).
110    pub fn is_empty(&self) -> bool {
111        self.total_resources == 0
112    }
113}
114
115impl Default for ProviderStats {
116    fn default() -> Self {
117        Self::new()
118    }
119}
120
121/// A hierarchical key for identifying resources in storage.
122///
123/// Resources are organized as: `tenant_id` → `resource_type` → `resource_id`
124/// This provides natural tenant isolation and efficient querying.
125#[derive(Debug, Clone, PartialEq, Eq, Hash)]
126pub struct StorageKey {
127    tenant_id: String,
128    resource_type: String,
129    resource_id: String,
130}
131
132impl StorageKey {
133    /// Create a new storage key.
134    pub fn new(
135        tenant_id: impl Into<String>,
136        resource_type: impl Into<String>,
137        resource_id: impl Into<String>,
138    ) -> Self {
139        Self {
140            tenant_id: tenant_id.into(),
141            resource_type: resource_type.into(),
142            resource_id: resource_id.into(),
143        }
144    }
145
146    /// Get the tenant ID.
147    pub fn tenant_id(&self) -> &str {
148        &self.tenant_id
149    }
150
151    /// Get the resource type.
152    pub fn resource_type(&self) -> &str {
153        &self.resource_type
154    }
155
156    /// Get the resource ID.
157    pub fn resource_id(&self) -> &str {
158        &self.resource_id
159    }
160
161    /// Create a prefix key for listing resources of a type within a tenant.
162    pub fn prefix(tenant_id: impl Into<String>, resource_type: impl Into<String>) -> StoragePrefix {
163        StoragePrefix {
164            tenant_id: tenant_id.into(),
165            resource_type: resource_type.into(),
166        }
167    }
168}
169
170impl fmt::Display for StorageKey {
171    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
172        write!(
173            f,
174            "{}/{}/{}",
175            self.tenant_id, self.resource_type, self.resource_id
176        )
177    }
178}
179
180/// A prefix for querying resources by tenant and type.
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub struct StoragePrefix {
183    tenant_id: String,
184    resource_type: String,
185}
186
187impl StoragePrefix {
188    /// Get the tenant ID.
189    pub fn tenant_id(&self) -> &str {
190        &self.tenant_id
191    }
192
193    /// Get the resource type.
194    pub fn resource_type(&self) -> &str {
195        &self.resource_type
196    }
197}
198
199impl fmt::Display for StoragePrefix {
200    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
201        write!(f, "{}/{}", self.tenant_id, self.resource_type)
202    }
203}
204
205/// Core trait for storage providers that handle pure data persistence operations.
206///
207/// This trait defines a protocol-agnostic interface for storing and retrieving JSON data
208/// with tenant isolation. Implementations should focus solely on data persistence and
209/// retrieval without any SCIM-specific logic.
210///
211/// # Design Principles
212///
213/// - **PUT/GET/DELETE Model**: Simple, fundamental operations
214/// - **PUT Returns Data**: Supports SCIM requirement to return resource state after operations
215/// - **DELETE Returns Boolean**: Indicates whether resource existed (for proper HTTP status codes)
216/// - **Tenant Isolation**: All operations are scoped to a specific tenant via StorageKey
217/// - **Protocol Agnostic**: No awareness of SCIM structures or semantics
218/// - **Async First**: All operations return futures for scalability
219/// - **Error Transparency**: Storage errors are clearly separated from protocol errors
220///
221/// # Key Design Decisions
222///
223/// - **No separate CREATE/UPDATE**: Both are just PUT operations. Business logic determines
224///   whether this should be treated as create vs update.
225/// - **PUT returns stored data**: This enables SCIM providers to return the complete resource
226///   state after modifications without a separate GET call.
227/// - **DELETE returns boolean**: Allows proper HTTP status code handling (204 vs 404).
228pub trait StorageProvider: Send + Sync {
229    /// The error type returned by storage operations.
230    type Error: std::error::Error + Send + Sync + 'static;
231
232    /// Store data at the specified key and return the stored data.
233    ///
234    /// # Arguments
235    /// * `key` - The storage key identifying the resource location
236    /// * `data` - The JSON data to store
237    ///
238    /// # Returns
239    /// The data that was actually stored (may include storage-level metadata).
240    ///
241    /// # Behavior
242    /// - If a resource with the same key already exists, it is completely replaced
243    /// - The storage implementation should ensure atomic operations where possible
244    /// - No validation is performed on the data structure
245    /// - The returned data should be exactly what would be retrieved by `get()`
246    fn put(
247        &self,
248        key: StorageKey,
249        data: Value,
250    ) -> impl Future<Output = Result<Value, Self::Error>> + Send;
251
252    /// Retrieve data by key.
253    ///
254    /// # Arguments
255    /// * `key` - The storage key identifying the resource
256    ///
257    /// # Returns
258    /// `Some(data)` if the resource exists, `None` if it doesn't exist.
259    fn get(
260        &self,
261        key: StorageKey,
262    ) -> impl Future<Output = Result<Option<Value>, Self::Error>> + Send;
263
264    /// Delete data by key.
265    ///
266    /// # Arguments
267    /// * `key` - The storage key identifying the resource
268    ///
269    /// # Returns
270    /// `true` if the resource was deleted, `false` if it didn't exist.
271    ///
272    /// # Note
273    /// This follows SCIM/HTTP semantics where DELETE operations don't return resource data.
274    /// The boolean return value allows proper HTTP status code selection (204 vs 404).
275    fn delete(&self, key: StorageKey) -> impl Future<Output = Result<bool, Self::Error>> + Send;
276
277    /// List resources matching a prefix with pagination.
278    ///
279    /// # Arguments
280    /// * `prefix` - The storage prefix (tenant + resource type)
281    /// * `offset` - The number of resources to skip (0-based)
282    /// * `limit` - The maximum number of resources to return
283    ///
284    /// # Returns
285    /// A vector of (key, data) pairs.
286    ///
287    /// # Behavior
288    /// - Results should be consistently ordered (e.g., by resource ID)
289    /// - If `offset` exceeds the total count, an empty vector should be returned
290    /// - If `limit` is 0, an empty vector should be returned
291    fn list(
292        &self,
293        prefix: StoragePrefix,
294        offset: usize,
295        limit: usize,
296    ) -> impl Future<Output = Result<Vec<(StorageKey, Value)>, Self::Error>> + Send;
297
298    /// Find resources by a specific attribute value.
299    ///
300    /// # Arguments
301    /// * `prefix` - The storage prefix (tenant + resource type)
302    /// * `attribute` - The JSON path of the attribute to search (e.g., "userName", "emails.0.value")
303    /// * `value` - The exact value to match
304    ///
305    /// # Returns
306    /// A vector of (key, data) pairs for matching resources.
307    ///
308    /// # Behavior
309    /// - Performs exact string matching on the specified attribute
310    /// - Supports nested attributes using dot notation
311    /// - Returns all matching resources (no pagination)
312    /// - Empty vector if no matches found
313    fn find_by_attribute(
314        &self,
315        prefix: StoragePrefix,
316        attribute: &str,
317        value: &str,
318    ) -> impl Future<Output = Result<Vec<(StorageKey, Value)>, Self::Error>> + Send;
319
320    /// Check if a resource exists.
321    ///
322    /// # Arguments
323    /// * `key` - The storage key identifying the resource
324    ///
325    /// # Returns
326    /// `true` if the resource exists, `false` if it doesn't.
327    ///
328    /// # Performance Note
329    /// This should be more efficient than `get()` as it doesn't need to return data.
330    fn exists(&self, key: StorageKey) -> impl Future<Output = Result<bool, Self::Error>> + Send;
331
332    /// Count the total number of resources matching a prefix.
333    ///
334    /// # Arguments
335    /// * `prefix` - The storage prefix (tenant + resource type)
336    ///
337    /// # Returns
338    /// The total count of matching resources.
339    fn count(
340        &self,
341        prefix: StoragePrefix,
342    ) -> impl Future<Output = Result<usize, Self::Error>> + Send;
343
344    /// List all tenant IDs that currently have data in storage.
345    ///
346    /// Returns tenant IDs for all tenants that contain at least one resource of any type.
347    /// This method enables dynamic tenant discovery without requiring hardcoded tenant patterns.
348    ///
349    /// # Returns
350    ///
351    /// A vector of tenant ID strings. Empty vector if no tenants have data.
352    ///
353    /// # Errors
354    ///
355    /// Returns storage-specific errors if the discovery operation fails.
356    ///
357    /// # Examples
358    ///
359    /// ```rust
360    /// use scim_server::storage::{StorageProvider, InMemoryStorage};
361    ///
362    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
363    /// let storage = InMemoryStorage::new();
364    /// let tenants = storage.list_tenants().await?;
365    /// println!("Found {} tenants", tenants.len());
366    /// # Ok(())
367    /// # }
368    /// ```
369    fn list_tenants(&self) -> impl Future<Output = Result<Vec<String>, Self::Error>> + Send;
370
371    /// List all resource types for a specific tenant.
372    ///
373    /// Returns resource type names (e.g., "User", "Group") that exist within the specified
374    /// tenant. Only resource types with at least one stored resource are included.
375    ///
376    /// # Arguments
377    ///
378    /// * `tenant_id` - The tenant ID to query for resource types
379    ///
380    /// # Returns
381    ///
382    /// A vector of resource type strings. Empty vector if tenant doesn't exist or has no resources.
383    ///
384    /// # Errors
385    ///
386    /// Returns storage-specific errors if the query operation fails.
387    ///
388    /// # Examples
389    ///
390    /// ```rust
391    /// use scim_server::storage::{StorageProvider, InMemoryStorage};
392    ///
393    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
394    /// let storage = InMemoryStorage::new();
395    /// let types = storage.list_resource_types("tenant1").await?;
396    /// for resource_type in types {
397    ///     println!("Tenant has resource type: {}", resource_type);
398    /// }
399    /// # Ok(())
400    /// # }
401    /// ```
402    fn list_resource_types(
403        &self,
404        tenant_id: &str,
405    ) -> impl Future<Output = Result<Vec<String>, Self::Error>> + Send;
406
407    /// List all resource types across all tenants.
408    ///
409    /// Returns a deduplicated collection of all resource type names found across all tenants
410    /// in storage. This provides a global view of resource types without tenant boundaries.
411    ///
412    /// # Returns
413    ///
414    /// A vector of unique resource type strings. Empty vector if no resources exist.
415    ///
416    /// # Errors
417    ///
418    /// Returns storage-specific errors if the discovery operation fails.
419    ///
420    /// # Examples
421    ///
422    /// ```rust
423    /// use scim_server::storage::{StorageProvider, InMemoryStorage};
424    ///
425    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
426    /// let storage = InMemoryStorage::new();
427    /// let all_types = storage.list_all_resource_types().await?;
428    /// println!("System supports {} resource types", all_types.len());
429    /// # Ok(())
430    /// # }
431    /// ```
432    fn list_all_resource_types(
433        &self,
434    ) -> impl Future<Output = Result<Vec<String>, Self::Error>> + Send;
435
436    /// Clear all data from storage.
437    ///
438    /// Removes all resources from all tenants, effectively resetting the storage to an empty state.
439    /// This operation is primarily intended for testing scenarios and should be used with caution
440    /// in production environments.
441    ///
442    /// # Returns
443    ///
444    /// `Ok(())` on successful clearing, or a storage-specific error on failure.
445    ///
446    /// # Errors
447    ///
448    /// Returns storage-specific errors if the clear operation fails partially or completely.
449    ///
450    /// # Behavior
451    ///
452    /// - Removes all resources from all tenants atomically where possible
453    /// - After successful clearing, [`list_tenants`] should return an empty vector
454    /// - Primarily intended for testing scenarios
455    ///
456    /// # Examples
457    ///
458    /// ```rust
459    /// use scim_server::storage::{StorageProvider, InMemoryStorage};
460    ///
461    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
462    /// let storage = InMemoryStorage::new();
463    /// // ... populate storage with data ...
464    /// storage.clear().await?;
465    /// let tenants = storage.list_tenants().await?;
466    /// assert_eq!(tenants.len(), 0);
467    /// # Ok(())
468    /// # }
469    /// ```
470    ///
471    /// [`list_tenants`]: Self::list_tenants
472    fn clear(&self) -> impl Future<Output = Result<(), Self::Error>> + Send;
473
474    /// Get storage statistics for debugging and monitoring.
475    ///
476    /// Returns statistics about storage usage including tenant count, resource type count,
477    /// and total number of resources across all tenants.
478    ///
479    /// # Returns
480    ///
481    /// A `StorageStats` struct containing usage metrics.
482    ///
483    /// # Errors
484    ///
485    /// Returns storage-specific errors if the stats collection operation fails.
486    ///
487    /// # Examples
488    ///
489    /// ```rust
490    /// use scim_server::storage::{StorageProvider, InMemoryStorage};
491    ///
492    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
493    /// let storage = InMemoryStorage::new();
494    /// let stats = storage.stats().await?;
495    /// println!("Total resources: {}", stats.total_resources);
496    /// # Ok(())
497    /// # }
498    /// ```
499    fn stats(&self) -> impl Future<Output = Result<StorageStats, Self::Error>> + Send;
500}