redisson 0.1.0

A Redis-based distributed synchronization and data structures library for Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
/*
 *
 *  *
 *  *      Copyright (c) 2018-2025, SnackCloud All rights reserved.
 *  *
 *  *   Redistribution and use in source and binary forms, with or without
 *  *   modification, are permitted provided that the following conditions are met:
 *  *
 *  *   Redistributions of source code must retain the above copyright notice,
 *  *   this list of conditions and the following disclaimer.
 *  *   Redistributions in binary form must reproduce the above copyright
 *  *   notice, this list of conditions and the following disclaimer in the
 *  *   documentation and/or other materials provided with the distribution.
 *  *   Neither the name of the www.snackcloud.cn developer nor the names of its
 *  *   contributors may be used to endorse or promote products derived from
 *  *   this software without specific prior written permission.
 *  *   Author: SnackCloud
 *  *
 *  
 */
use std::sync::Arc;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use redis::AsyncTypedCommands;
use tokio::sync::{Mutex as TokioMutex};
use tokio::time::{sleep};
use tracing::{debug, info};
use uuid::Uuid;
use crate::{scripts, AsyncRedisConnectionManager, AsyncNetworkLatencyStats, RedissonError, RedissonResult};
use crate::lock::RedLockLocalState;

/// === AsyncRRedLock (Asynchronous red lock) ===
pub struct AsyncRRedLock {
    connection_managers: Vec<Arc<AsyncRedisConnectionManager>>,
    name: String,
    lease_time: Duration,
    drift_factor: f64,
    local_state: Arc<TokioMutex<RedLockLocalState>>,
    latency_stats: Arc<AsyncNetworkLatencyStats>,
}

impl AsyncRRedLock {
    pub fn new(
        connection_managers: Vec<Arc<AsyncRedisConnectionManager>>,
        name: String,
        lease_time: Duration,
    ) -> Self {
        Self {
            connection_managers,
            name,
            lease_time,
            drift_factor: 0.01,
            local_state: Arc::new(TokioMutex::new(RedLockLocalState {
                lock_value: None,
                acquired_at: None,
                acquired_nodes: Vec::new(),
            })),
            latency_stats: Arc::new(AsyncNetworkLatencyStats::new(100)),
        }
    }

    pub fn with_drift_factor(mut self, drift_factor: f64) -> Self {
        self.drift_factor = drift_factor;
        self
    }

    /// Acquire locks asynchronously
    pub async fn lock(&self) -> RedissonResult<()> {
        let stats = self.latency_stats.get_stats().await;
        if stats.count < 3 {
            debug!("Automatically warm up network latency measurements...");
            self.warmup_latency_measurement(5).await;
        }
        self.lock_with_retries(3, Duration::from_millis(200)).await
    }

    /// Asynchronous lock acquisition with retry
    pub async fn lock_with_retries(&self, max_retries: u32, retry_delay: Duration) -> RedissonResult<()> {
        for attempt in 0..max_retries {
            match self.try_lock_once().await {
                Ok(()) => return Ok(()),
                Err(e) => {
                    if attempt == max_retries - 1 {
                        return Err(e);
                    }
                    sleep(retry_delay).await;
                }
            }
        }
        Err(RedissonError::LockAcquisitionError)
    }

    /// Single-attempt lock acquisition (asynchronous)
    async fn try_lock_once(&self) -> RedissonResult<()> {
        // Check if the lock is already held
        {
            let state = self.local_state.lock().await;
            if state.lock_value.is_some() {
                return Ok(());
            }
        }

        let quorum = self.calculate_quorum();
        let lock_value = Uuid::new_v4().to_string();
        let mut successes = 0;
        let mut acquired_nodes = Vec::new();
        let start_time = Instant::now();

        // Attempt to acquire locks at all nodes in parallel
        let mut tasks = Vec::new();
        for (i, manager) in self.connection_managers.iter().enumerate() {
            let manager = manager.clone();
            let name = self.name.clone();
            let lock_value = lock_value.clone();
            let lease_time = self.lease_time;

            tasks.push(tokio::spawn(async move {
                match manager.get_connection().await {
                    Ok(mut conn) => {
                        let result: Result<i32, _> = scripts::LOCK_SCRIPT
                            .key(&name)
                            .arg(&lock_value)
                            .arg(lease_time.as_millis() as i64)
                            .invoke_async(&mut conn)
                            .await;

                        match result {
                            Ok(acquired) if acquired > 0 => Some((i, true)),
                            _ => Some((i, false)),
                        }
                    }
                    Err(_) => Some((i, false)),
                }
            }));
        }

        // Collecting results
        for task in tasks {
            if let Ok(Some((i, success))) = task.await {
                if success {
                    successes += 1;
                    acquired_nodes.push(i);
                }
            }
        }

        // Computing valid time
        let total_elapsed = start_time.elapsed();
        
        // A modified valid time calculation is used
        let validity_time = self.calculate_validity_time(total_elapsed, successes).await;

        // Determining success
        if successes >= quorum && validity_time.as_millis() > 0 {
            // Saving state
            let mut state = self.local_state.lock().await;
            state.lock_value = Some(lock_value.clone());
            state.acquired_at = Some(start_time);
            state.acquired_nodes = acquired_nodes;

            // Start an asynchronous renewal task
            if self.lease_time.as_secs() > 0 {
                self.start_async_renewal_task(lock_value, validity_time).await;
            }

            Ok(())
        } else {
            // Clean up
            self.cleanup_partial_locks_async(&lock_value, &acquired_nodes).await;

            if successes < quorum {
                Err(RedissonError::LockAcquisitionError)
            } else {
                Err(RedissonError::TimeoutError)
            }
        }
    }

    /// Release locks asynchronously
    pub async fn unlock(&self) -> RedissonResult<bool> {
        let (lock_value, acquired_nodes) = {
            let mut state = self.local_state.lock().await;
            let lock_value = state.lock_value.take();
            let acquired_nodes = std::mem::take(&mut state.acquired_nodes);
            state.acquired_at = None;
            (lock_value, acquired_nodes)
        };

        if let Some(lock_value) = lock_value {
            let mut successes = 0;

            // Releasing locks in parallel
            let mut tasks = Vec::new();
            for &node_idx in &acquired_nodes {
                if node_idx < self.connection_managers.len() {
                    let manager = self.connection_managers[node_idx].clone();
                    let name = self.name.clone();
                    let lock_value = lock_value.clone();

                    tasks.push(tokio::spawn(async move {
                        match manager.get_connection().await {
                            Ok(mut conn) => {
                                let result: Result<i32, _> = scripts::UNLOCK_SCRIPT
                                    .key(&name)
                                    .arg(&lock_value)
                                    .invoke_async(&mut conn)
                                    .await;

                                result.unwrap_or(0) > 0
                            }
                            Err(_) => false,
                        }
                    }));
                }
            }

            // Collecting results
            for task in tasks {
                if let Ok(success) = task.await {
                    if success {
                        successes += 1;
                    }
                }
            }

            // A majority of nodes are required to be released successfully
            let min_releases = (acquired_nodes.len() / 2) + 1;
            Ok(successes >= min_releases)
        } else {
            Ok(false)
        }
    }

    /// An asynchronous attempt to acquire the lock
    pub async fn try_lock(&self) -> RedissonResult<bool> {
        match self.try_lock_once().await {
            Ok(()) => Ok(true),
            Err(RedissonError::LockAcquisitionError) => Ok(false),
            Err(RedissonError::TimeoutError) => Ok(false),
            Err(e) => Err(e),
        }
    }

    /// Asynchronously clean up partially successful locks
    async fn cleanup_partial_locks_async(&self, lock_value: &str, acquired_nodes: &[usize]) {
        let mut tasks = Vec::new();

        for &node_idx in acquired_nodes {
            if node_idx < self.connection_managers.len() {
                let manager = self.connection_managers[node_idx].clone();
                let name = self.name.clone();
                let lock_value = lock_value.to_string();

                tasks.push(tokio::spawn(async move {
                    if let Ok(mut conn) = manager.get_connection().await {
                        let _ = scripts::UNLOCK_SCRIPT
                            .key(&name)
                            .arg(&lock_value)
                            .invoke_async::<i32>(&mut conn)
                            .await;
                    }
                }));
            }
        }

        for task in tasks {
            let _ = task.await;
        }
    }

    /// Start an asynchronous renewal task
    async fn start_async_renewal_task(&self, lock_value: String, initial_validity: Duration) {
        let connection_managers = self.connection_managers.clone();
        let name = self.name.clone();
        let lease_time = self.lease_time;
        let renew_interval = initial_validity / 3;

        tokio::spawn(async move {
            let mut is_running = true;

            while is_running {
                sleep(renew_interval).await;

                // Parallel renewal
                let mut tasks = Vec::new();
                for manager in &connection_managers {
                    let manager = manager.clone();
                    let name = name.clone();
                    let lock_value = lock_value.clone();
                    let lease_time = lease_time;

                    tasks.push(tokio::spawn(async move {
                        match manager.get_connection().await {
                            Ok(mut conn) => {
                                let result: Result<i32, _> = scripts::RENEW_SCRIPT
                                    .key(&name)
                                    .arg(&lock_value)
                                    .arg(lease_time.as_millis() as i64)
                                    .invoke_async(&mut conn)
                                    .await;

                                result.unwrap_or(0) > 0
                            }
                            Err(_) => false,
                        }
                    }));
                }

                // Collecting results
                let mut successes = 0;
                for task in tasks {
                    if let Ok(success) = task.await {
                        if success {
                            successes += 1;
                        }
                    }
                }

                // Check if the renewal was successful
                let quorum = (connection_managers.len() / 2) + 1;
                if successes < quorum {
                    is_running = false;
                }
            }
        });
    }

    /// Calculation of legal quantity
    fn calculate_quorum(&self) -> usize {
        let n = self.connection_managers.len();
        n / 2 + 1
    }

    /// Get the number of healthy nodes
    pub async fn healthy_node_count(&self) -> usize {
        let mut healthy_count = 0;
        for connector in self.connection_managers
            .iter() {
            if connector.health_check().await {
                healthy_count += 1;
            }
        }
        healthy_count
    }
    
    /// Calculate more accurate valid times
    async fn calculate_validity_time(&self, elapsed: Duration, acquired_nodes: usize) -> Duration {
        let quorum = self.calculate_quorum();

        if acquired_nodes < quorum {
            return Duration::from_secs(0);
        }

        // Basic effective time
        let basic_validity = if elapsed < self.lease_time {
            self.lease_time - elapsed
        } else {
            Duration::from_secs(0)
        };

        if basic_validity == Duration::from_secs(0) {
            return basic_validity;
        }

        // The drift factor is calculated dynamically
        let drift_factor = self.calculate_dynamic_drift_factor().await;
        let drift = Duration::from_millis(
            (drift_factor * self.lease_time.as_millis() as f64) as u64
        );

        // Network delay bound
        let network_margin = self.estimate_network_margin().await;

        // Node health compensation
        let healthy_nodes = self.healthy_node_count().await;
        let node_health_penalty = if healthy_nodes < self.connection_managers.len() {
            // There are nodes that are not healthy, increasing the safety margin
            Duration::from_millis(5)
        } else {
            Duration::from_millis(0)
        };

        // The final valid time
        let total_margin = drift + network_margin + node_health_penalty + Duration::from_millis(2);

        basic_validity.checked_sub(total_margin).unwrap_or(Duration::from_secs(0))
    }


    /// Measuring Network Round-trip Time (Ping)
    async fn measure_network_rtt(&self) -> Duration {
        let mut total_rtt = Duration::from_secs(0);
        let mut successful_measurements = 0;

        for manager in &self.connection_managers {
            let start = Instant::now();

            // Execute a simple Redis command to measure the RTT
            if let Ok(mut conn) = manager.get_connection().await {
                if conn.ping().await.is_ok() {
                    let rtt = start.elapsed();
                    total_rtt += rtt;
                    successful_measurements += 1;

                    // Record this sample
                    self.latency_stats.add_sample(rtt).await;
                }
            }
        }

        if successful_measurements > 0 {
            total_rtt / successful_measurements as u32
        } else {
            // Default values
            Duration::from_millis(10)
        }
    }

    /// Intelligent estimation of network delay bounds
    async fn estimate_network_margin(&self) -> Duration {
        let stats = self.latency_stats.get_stats().await;

        if stats.count < 5 {
            // The sample was insufficient and conservative estimates were used
            // Initial value: 50ms + 2 * RTT
            let current_rtt = self.measure_network_rtt().await;
            Duration::from_millis(50) + (current_rtt * 2)
        } else {
            // Intelligent estimation using statistics
            // Network delay bound = P99 delay * 2 + clock drift compensation
            let margin = stats.p99 * 2;

            // It is adjusted according to the network jitter
            let jitter = if stats.max > stats.min {
                stats.max - stats.min
            } else {
                Duration::from_millis(0)
            };

            // Final latency bounds
            margin + jitter / 2 + Duration::from_millis(2) // 2ms basic fault tolerance
        }
    }

    /// Adjust the drift factor dynamically
    async fn calculate_dynamic_drift_factor(&self) -> f64 {
        let stats = self.latency_stats.get_stats().await;

        if stats.count < 10 {
            return 0.01; // Default values
        }

        // The drift factor is adjusted according to the network stability
        let stability_factor: f32 = if stats.p99.as_millis() > 100 {
            // High latency networks, using a larger drift factor
            0.02
        } else if stats.p95.as_millis() < 10 && stats.p99.as_millis() < 20 {
            // Stabilizing low-latency networks, smaller drift factors can be used
            0.005
        } else {
            // General network
            0.01
        };

        // Adjust according to the clock synchronization state
        let clock_sync_factor = if self.check_clock_synchronization().await {
            0.005 // Clocks are well synchronized
        } else {
            0.015 // Clocks may be out of sync
        };

        // Take the larger of the two
        stability_factor.max(clock_sync_factor) as f64
    }

    /// Check the clock synchronization status between nodes
    async fn check_clock_synchronization(&self) -> bool {
        // If there are too few nodes, we simply return false
        if self.connection_managers.len() < 2 {
            debug!("Too few nodes for clock synchronization check");
            return false;
        }

        let mut node_times = Vec::new();
        let mut tasks = Vec::new();

        // Get the Redis TIME of all nodes in parallel
        for (i, manager) in self.connection_managers.iter().enumerate() {
            let manager = manager.clone();

            tasks.push(tokio::spawn(async move {
                match manager.get_connection().await {
                    Ok(mut conn) => {
                        // Use Redis' TIME command to get the server time
                        let result: Result<Vec<String>, redis::RedisError> =
                            redis::cmd("TIME").query_async(&mut conn).await;

                        match result {
                            Ok(time_parts) if time_parts.len() >= 2 => {
                                // Parse the returned time string
                                if let (Ok(seconds), Ok(microseconds)) = (
                                    time_parts[0].parse::<u64>(),
                                    time_parts[1].parse::<u64>()
                                ) {
                                    // Converts to a millisecond timestamp
                                    let timestamp_ms = seconds * 1000 + microseconds / 1000;
                                    Some((i, timestamp_ms))
                                } else {
                                    debug!("Failed to parse time from node {}: {:?}", i, time_parts);
                                    None
                                }
                            }
                            Ok(time_parts) => {
                                debug!("Invalid TIME response from node {}: {:?}", i, time_parts);
                                None
                            }
                            Err(e) => {
                                debug!("Failed to get time from node {}: {}", i, e);
                                None
                            }
                        }
                    }
                    Err(e) => {
                        debug!("Failed to connect to node {}: {}", i, e);
                        None
                    }
                }
            }));
        }

        // Collecting results
        for task in tasks {
            if let Ok(Some((node_idx, timestamp))) = task.await {
                node_times.push((node_idx, timestamp));
            }
        }

        // If too few node times are fetched, delay statistics are used as fallback
        if node_times.len() < 2 {
            debug!("Not enough time samples, falling back to latency statistics");
            return self.check_sync_via_latency_stats().await;
        }

        // Difference in computation time
        self.analyze_time_differences(&node_times).await
    }

    /// Clock Synchronization Checking based on delay statistics (fallback method)
    async fn check_sync_via_latency_stats(&self) -> bool {
        let stats = self.latency_stats.get_stats().await;

        if stats.count < 5 {
            debug!("Insufficient latency data for clock sync check");
            return false;
        }

        // The delay variation range is used to judge the clock synchronization
        // If the latency is very stable, the clocks are probably well synchronized
        let latency_range = stats.max - stats.min;
        let avg_latency = stats.avg;

        // Clock synchronization is considered to be good if the delay range is less than 30% of the average delay and less than 10ms
        let range_to_avg_ratio = if avg_latency > Duration::from_micros(1) {
            latency_range.as_micros() as f64 / avg_latency.as_micros() as f64
        } else {
            1.0
        };

        let is_synced = latency_range < Duration::from_millis(10) &&
            range_to_avg_ratio < 0.3;

        debug!(
            "Clock sync check via latency: range={:?}, avg={:?}, ratio={:.2}, synced={}",
            latency_range, avg_latency, range_to_avg_ratio, is_synced
        );

        is_synced
    }

    /// Analyzing time differences
    async fn analyze_time_differences(&self, node_times: &[(usize, u64)]) -> bool {
        // Find the minimum and maximum timestamps
        let timestamps: Vec<u64> = node_times.iter().map(|(_, ts)| *ts).collect();
        let min_ts = *timestamps.iter().min().unwrap_or(&0);
        let max_ts = *timestamps.iter().max().unwrap_or(&0);

        // Calculate maximum time difference (ms)
        let max_diff_ms = max_ts.saturating_sub(min_ts);

        // Get the local time as a reference
        let local_time_ms = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .unwrap_or_default()
            .as_millis() as u64;

        // Calculate the average difference from the local time
        let mut total_diff = 0u64;
        let mut valid_samples = 0;

        for (_, ts) in node_times {
            let diff = ts.abs_diff(local_time_ms);
            if diff < 10000 { // Outliers with differences greater than 10 seconds are ignored
                total_diff += diff;
                valid_samples += 1;
            }
        }

        let avg_diff = if valid_samples > 0 {
            total_diff / valid_samples as u64
        } else {
            max_diff_ms
        };

        // Criteria of judgment:
        // 1. The maximum difference between nodes is < 10ms
        // 2. The average difference from the local time was < 100ms
        let is_synced = max_diff_ms < 10 && avg_diff < 100;

        debug!(
            "Clock sync analysis: nodes={}, max_diff={}ms, avg_diff={}ms, synced={}",
            node_times.len(), max_diff_ms, avg_diff, is_synced
        );

        if !is_synced && max_diff_ms < 50 {
            // If the difference is small but not perfectly synchronized, log a warning but do not return a failure
            debug!("Clocks slightly out of sync: max_diff={}ms", max_diff_ms);
        }

        is_synced
    }
    
    /// Warm up network latency measurements
    pub async fn warmup_latency_measurement(&self, iterations: usize) {
        info!("Start warming up network latency measurements ({} iterations)...", iterations);

        for i in 0..iterations {
            let rtt = self.measure_network_rtt().await;
            if i == 0 || (i + 1) % 10 == 0 {
                debug!("Warm up iteration {}: RTT = {:? }", i + 1, rtt);
            }
            // Short delay to avoid overload
            sleep(Duration::from_millis(10)).await;
        }

        let stats = self.latency_stats.get_stats().await;
        info!("Delayed network warmup is completed: {}", stats);
    }
}