use std::collections::HashMap;
use std::sync::Arc;
use std::sync::RwLock;
use std::sync::atomic::{AtomicU64, Ordering};
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum ReplicationStatus {
Pending,
Completed,
Failed,
Replica,
}
impl ReplicationStatus {
#[must_use]
pub fn as_aws_str(&self) -> &'static str {
match self {
Self::Pending => "PENDING",
Self::Completed => "COMPLETED",
Self::Failed => "FAILED",
Self::Replica => "REPLICA",
}
}
}
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct ReplicationFilter {
pub prefix: Option<String>,
pub tags: Vec<(String, String)>,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct ReplicationRule {
pub id: String,
pub priority: u32,
pub status_enabled: bool,
pub filter: ReplicationFilter,
pub destination_bucket: String,
pub destination_storage_class: Option<String>,
}
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct ReplicationConfig {
pub role: String,
pub rules: Vec<ReplicationRule>,
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct ReplicationSnapshot {
by_bucket: HashMap<String, ReplicationConfig>,
statuses: Vec<((String, String), ReplicationStatus)>,
}
pub struct ReplicationManager {
by_bucket: RwLock<HashMap<String, ReplicationConfig>>,
statuses: RwLock<HashMap<(String, String), ReplicationStatus>>,
pub dropped_total: AtomicU64,
}
impl Default for ReplicationManager {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Debug for ReplicationManager {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ReplicationManager")
.field("dropped_total", &self.dropped_total.load(Ordering::Relaxed))
.finish_non_exhaustive()
}
}
impl ReplicationManager {
#[must_use]
pub fn new() -> Self {
Self {
by_bucket: RwLock::new(HashMap::new()),
statuses: RwLock::new(HashMap::new()),
dropped_total: AtomicU64::new(0),
}
}
pub fn put(&self, bucket: &str, config: ReplicationConfig) {
self.by_bucket
.write()
.expect("replication state RwLock poisoned")
.insert(bucket.to_owned(), config);
}
#[must_use]
pub fn get(&self, bucket: &str) -> Option<ReplicationConfig> {
self.by_bucket
.read()
.expect("replication state RwLock poisoned")
.get(bucket)
.cloned()
}
pub fn delete(&self, bucket: &str) {
self.by_bucket
.write()
.expect("replication state RwLock poisoned")
.remove(bucket);
}
pub fn to_json(&self) -> Result<String, serde_json::Error> {
let snap = ReplicationSnapshot {
by_bucket: self
.by_bucket
.read()
.expect("replication state RwLock poisoned")
.clone(),
statuses: self
.statuses
.read()
.expect("replication state RwLock poisoned")
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect(),
};
serde_json::to_string(&snap)
}
pub fn from_json(s: &str) -> Result<Self, serde_json::Error> {
let snap: ReplicationSnapshot = serde_json::from_str(s)?;
Ok(Self {
by_bucket: RwLock::new(snap.by_bucket),
statuses: RwLock::new(snap.statuses.into_iter().collect()),
dropped_total: AtomicU64::new(0),
})
}
#[must_use]
pub fn match_rule(
&self,
bucket: &str,
key: &str,
object_tags: &[(String, String)],
) -> Option<ReplicationRule> {
let map = self
.by_bucket
.read()
.expect("replication state RwLock poisoned");
let cfg = map.get(bucket)?;
let mut best: Option<&ReplicationRule> = None;
for rule in &cfg.rules {
if !rule.status_enabled {
continue;
}
if !filter_matches(&rule.filter, key, object_tags) {
continue;
}
best = match best {
None => Some(rule),
Some(prev) if rule.priority > prev.priority => Some(rule),
Some(prev) => Some(prev),
};
}
best.cloned()
}
pub fn record_status(&self, bucket: &str, key: &str, status: ReplicationStatus) {
self.statuses
.write()
.expect("replication state RwLock poisoned")
.insert((bucket.to_owned(), key.to_owned()), status);
}
#[must_use]
pub fn lookup_status(&self, bucket: &str, key: &str) -> Option<ReplicationStatus> {
self.statuses
.read()
.expect("replication state RwLock poisoned")
.get(&(bucket.to_owned(), key.to_owned()))
.cloned()
}
}
fn filter_matches(
filter: &ReplicationFilter,
key: &str,
object_tags: &[(String, String)],
) -> bool {
if let Some(p) = filter.prefix.as_deref()
&& !p.is_empty()
&& !key.starts_with(p)
{
return false;
}
for (tk, tv) in &filter.tags {
if !object_tags
.iter()
.any(|(ok, ov)| ok == tk && ov == tv)
{
return false;
}
}
true
}
const RETRY_ATTEMPTS: u32 = 3;
const RETRY_BASE_MS: u64 = 50;
pub async fn replicate_object<F, Fut>(
rule: ReplicationRule,
source_bucket: String,
source_key: String,
body: bytes::Bytes,
metadata: Option<HashMap<String, String>>,
do_put: F,
manager: Arc<ReplicationManager>,
) where
F: Fn(String, String, bytes::Bytes, Option<HashMap<String, String>>) -> Fut,
Fut: std::future::Future<Output = Result<(), String>>,
{
let mut replica_meta = metadata.unwrap_or_default();
replica_meta.insert(
"x-amz-replication-status".to_owned(),
ReplicationStatus::Replica.as_aws_str().to_owned(),
);
if let Some(ref sc) = rule.destination_storage_class {
replica_meta.insert("x-amz-storage-class".to_owned(), sc.clone());
}
let dest_bucket = rule.destination_bucket.clone();
for attempt in 0..RETRY_ATTEMPTS {
let result = do_put(
dest_bucket.clone(),
source_key.clone(),
body.clone(),
Some(replica_meta.clone()),
)
.await;
match result {
Ok(()) => {
manager.record_status(
&source_bucket,
&source_key,
ReplicationStatus::Completed,
);
crate::metrics::record_replication_replicated(&source_bucket, &dest_bucket);
tracing::debug!(
source_bucket = %source_bucket,
source_key = %source_key,
dest_bucket = %dest_bucket,
rule_id = %rule.id,
"S4 replication: COMPLETED"
);
return;
}
Err(e) => {
if attempt + 1 < RETRY_ATTEMPTS {
let delay_ms = RETRY_BASE_MS * (1u64 << attempt);
tracing::warn!(
source_bucket = %source_bucket,
source_key = %source_key,
dest_bucket = %dest_bucket,
attempt = attempt + 1,
error = %e,
"S4 replication: attempt failed, retrying"
);
tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
continue;
}
manager.record_status(
&source_bucket,
&source_key,
ReplicationStatus::Failed,
);
manager.dropped_total.fetch_add(1, Ordering::Relaxed);
crate::metrics::record_replication_drop(&source_bucket);
tracing::warn!(
source_bucket = %source_bucket,
source_key = %source_key,
dest_bucket = %dest_bucket,
rule_id = %rule.id,
error = %e,
"S4 replication: FAILED after {RETRY_ATTEMPTS} attempts (drop counter bumped)"
);
return;
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Mutex;
fn rule(
id: &str,
priority: u32,
enabled: bool,
prefix: Option<&str>,
tags: &[(&str, &str)],
dest: &str,
) -> ReplicationRule {
ReplicationRule {
id: id.to_owned(),
priority,
status_enabled: enabled,
filter: ReplicationFilter {
prefix: prefix.map(str::to_owned),
tags: tags
.iter()
.map(|(k, v)| ((*k).to_owned(), (*v).to_owned()))
.collect(),
},
destination_bucket: dest.to_owned(),
destination_storage_class: None,
}
}
#[test]
fn match_rule_prefix_filter_match_and_miss() {
let mgr = ReplicationManager::new();
mgr.put(
"src",
ReplicationConfig {
role: "arn:aws:iam::000:role/s4-test".into(),
rules: vec![rule("r1", 1, true, Some("logs/"), &[], "dst")],
},
);
assert!(mgr.match_rule("src", "logs/2026/01/01.log", &[]).is_some());
assert!(mgr.match_rule("src", "uploads/foo.bin", &[]).is_none());
}
#[test]
fn match_rule_no_config_for_bucket() {
let mgr = ReplicationManager::new();
assert!(mgr.match_rule("ghost", "k", &[]).is_none());
}
#[test]
fn match_rule_priority_picks_highest() {
let mgr = ReplicationManager::new();
mgr.put(
"src",
ReplicationConfig {
role: "arn".into(),
rules: vec![
rule("low", 1, true, Some(""), &[], "dst-low"),
rule("high", 10, true, Some(""), &[], "dst-high"),
rule("mid", 5, true, Some(""), &[], "dst-mid"),
],
},
);
let picked = mgr.match_rule("src", "any.bin", &[]).expect("match");
assert_eq!(picked.id, "high");
assert_eq!(picked.destination_bucket, "dst-high");
}
#[test]
fn match_rule_priority_tie_breaker_is_declaration_order() {
let mgr = ReplicationManager::new();
mgr.put(
"src",
ReplicationConfig {
role: "arn".into(),
rules: vec![
rule("first", 5, true, Some(""), &[], "dst-first"),
rule("second", 5, true, Some(""), &[], "dst-second"),
],
},
);
let picked = mgr.match_rule("src", "k", &[]).expect("match");
assert_eq!(picked.id, "first", "tie on priority must keep the earlier rule");
}
#[test]
fn match_rule_tag_filter_and_of_all_tags() {
let mgr = ReplicationManager::new();
mgr.put(
"src",
ReplicationConfig {
role: "arn".into(),
rules: vec![rule(
"r-tags",
1,
true,
None,
&[("env", "prod"), ("tier", "gold")],
"dst",
)],
},
);
assert!(
mgr.match_rule(
"src",
"k",
&[
("env".into(), "prod".into()),
("tier".into(), "gold".into()),
("extra".into(), "ignored".into())
]
)
.is_some(),
"all required tags present (extras OK) must match"
);
assert!(
mgr.match_rule(
"src",
"k",
&[("env".into(), "prod".into())]
)
.is_none(),
"missing one of the required tags must not match"
);
assert!(
mgr.match_rule(
"src",
"k",
&[
("env".into(), "dev".into()),
("tier".into(), "gold".into())
]
)
.is_none(),
"wrong value on a required tag must not match"
);
}
#[test]
fn match_rule_status_disabled_never_matches() {
let mgr = ReplicationManager::new();
mgr.put(
"src",
ReplicationConfig {
role: "arn".into(),
rules: vec![rule("disabled", 100, false, None, &[], "dst")],
},
);
assert!(
mgr.match_rule("src", "anything", &[]).is_none(),
"status_enabled=false must not match even at high priority"
);
}
#[test]
fn record_and_lookup_status_round_trip() {
let mgr = ReplicationManager::new();
assert!(mgr.lookup_status("b", "k").is_none());
mgr.record_status("b", "k", ReplicationStatus::Pending);
assert_eq!(
mgr.lookup_status("b", "k"),
Some(ReplicationStatus::Pending)
);
mgr.record_status("b", "k", ReplicationStatus::Completed);
assert_eq!(
mgr.lookup_status("b", "k"),
Some(ReplicationStatus::Completed)
);
}
#[test]
fn json_round_trip_preserves_config_and_statuses() {
let mgr = ReplicationManager::new();
mgr.put(
"src",
ReplicationConfig {
role: "arn:aws:iam::000:role/s4".into(),
rules: vec![rule("r1", 7, true, Some("docs/"), &[("env", "prod")], "dst")],
},
);
mgr.record_status("src", "docs/a.pdf", ReplicationStatus::Completed);
let json = mgr.to_json().expect("to_json");
let mgr2 = ReplicationManager::from_json(&json).expect("from_json");
assert_eq!(mgr.get("src"), mgr2.get("src"));
assert_eq!(
mgr2.lookup_status("src", "docs/a.pdf"),
Some(ReplicationStatus::Completed)
);
}
#[test]
fn delete_is_idempotent() {
let mgr = ReplicationManager::new();
mgr.delete("never-existed");
mgr.put(
"b",
ReplicationConfig {
role: "arn".into(),
rules: vec![rule("r1", 1, true, None, &[], "dst")],
},
);
mgr.delete("b");
assert!(mgr.get("b").is_none());
}
#[test]
fn put_replaces_previous_config() {
let mgr = ReplicationManager::new();
mgr.put(
"b",
ReplicationConfig {
role: "arn".into(),
rules: vec![rule("old", 1, true, None, &[], "dst-old")],
},
);
mgr.put(
"b",
ReplicationConfig {
role: "arn".into(),
rules: vec![rule("new", 1, true, None, &[], "dst-new")],
},
);
let cfg = mgr.get("b").expect("config");
assert_eq!(cfg.rules.len(), 1);
assert_eq!(cfg.rules[0].id, "new");
assert_eq!(cfg.rules[0].destination_bucket, "dst-new");
}
#[tokio::test]
async fn replicate_object_happy_path_marks_completed() {
type Captured = Vec<(String, String, bytes::Bytes, Option<HashMap<String, String>>)>;
let mgr = Arc::new(ReplicationManager::new());
let captured: Arc<Mutex<Captured>> = Arc::new(Mutex::new(Vec::new()));
let captured_cl = Arc::clone(&captured);
let do_put = move |dest: String,
key: String,
body: bytes::Bytes,
meta: Option<HashMap<String, String>>| {
let captured = Arc::clone(&captured_cl);
async move {
captured.lock().unwrap().push((dest, key, body, meta));
Ok::<(), String>(())
}
};
replicate_object(
rule("r1", 1, true, None, &[], "dst"),
"src".into(),
"obj.bin".into(),
bytes::Bytes::from_static(b"hello"),
Some(HashMap::from([("content-type".into(), "text/plain".into())])),
do_put,
Arc::clone(&mgr),
)
.await;
assert_eq!(
mgr.lookup_status("src", "obj.bin"),
Some(ReplicationStatus::Completed)
);
assert_eq!(mgr.dropped_total.load(Ordering::Relaxed), 0);
let cap = captured.lock().unwrap();
assert_eq!(cap.len(), 1, "do_put must run exactly once on success");
assert_eq!(cap[0].0, "dst");
assert_eq!(cap[0].1, "obj.bin");
assert_eq!(cap[0].2.as_ref(), b"hello");
let meta = cap[0].3.as_ref().expect("metadata stamped");
assert_eq!(
meta.get("x-amz-replication-status").map(String::as_str),
Some("REPLICA"),
"destination meta must carry the REPLICA stamp"
);
assert_eq!(meta.get("content-type").map(String::as_str), Some("text/plain"));
}
#[tokio::test]
async fn replicate_object_failure_after_retry_budget_marks_failed_and_bumps_drop() {
let mgr = Arc::new(ReplicationManager::new());
let attempts: Arc<Mutex<u32>> = Arc::new(Mutex::new(0));
let attempts_cl = Arc::clone(&attempts);
let do_put = move |_dest: String,
_key: String,
_body: bytes::Bytes,
_meta: Option<HashMap<String, String>>| {
let attempts = Arc::clone(&attempts_cl);
async move {
*attempts.lock().unwrap() += 1;
Err::<(), String>("simulated destination 5xx".into())
}
};
replicate_object(
rule("r-fail", 1, true, None, &[], "dst"),
"src".into(),
"doomed.bin".into(),
bytes::Bytes::from_static(b"x"),
None,
do_put,
Arc::clone(&mgr),
)
.await;
assert_eq!(
*attempts.lock().unwrap(),
RETRY_ATTEMPTS,
"must retry exactly the configured budget"
);
assert_eq!(
mgr.lookup_status("src", "doomed.bin"),
Some(ReplicationStatus::Failed)
);
assert_eq!(
mgr.dropped_total.load(Ordering::Relaxed),
1,
"drop counter must bump exactly once after retry budget exhausted"
);
}
#[test]
fn replication_status_aws_strings_match_spec() {
assert_eq!(ReplicationStatus::Pending.as_aws_str(), "PENDING");
assert_eq!(ReplicationStatus::Completed.as_aws_str(), "COMPLETED");
assert_eq!(ReplicationStatus::Failed.as_aws_str(), "FAILED");
assert_eq!(ReplicationStatus::Replica.as_aws_str(), "REPLICA");
}
}