use std::collections::HashMap;
use std::sync::RwLock;
use serde::{Deserialize, Serialize};
pub const MAX_TAGS_PER_OBJECT: usize = 10;
pub const MAX_TAG_KEY_BYTES: usize = 128;
pub const MAX_TAG_VALUE_BYTES: usize = 256;
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct TagSet(pub Vec<(String, String)>);
impl TagSet {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn from_pairs(pairs: Vec<(String, String)>) -> Result<Self, TagError> {
let s = Self(pairs);
s.validate()?;
Ok(s)
}
#[must_use]
pub fn get(&self, key: &str) -> Option<&str> {
self.0
.iter()
.rev()
.find(|(k, _)| k == key)
.map(|(_, v)| v.as_str())
}
pub fn iter(&self) -> impl Iterator<Item = &(String, String)> {
self.0.iter()
}
#[must_use]
pub fn len(&self) -> usize {
self.0.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn validate(&self) -> Result<(), TagError> {
if self.0.len() > MAX_TAGS_PER_OBJECT {
return Err(TagError::TooManyTags {
got: self.0.len(),
max: MAX_TAGS_PER_OBJECT,
});
}
let mut seen: std::collections::HashSet<&str> =
std::collections::HashSet::with_capacity(self.0.len());
for (k, v) in &self.0 {
if k.is_empty() {
return Err(TagError::EmptyKey);
}
if k.len() > MAX_TAG_KEY_BYTES {
return Err(TagError::KeyTooLong {
len: k.len(),
max: MAX_TAG_KEY_BYTES,
});
}
if v.len() > MAX_TAG_VALUE_BYTES {
return Err(TagError::ValueTooLong {
key: k.clone(),
len: v.len(),
max: MAX_TAG_VALUE_BYTES,
});
}
if !seen.insert(k.as_str()) {
return Err(TagError::DuplicateKey { key: k.clone() });
}
}
Ok(())
}
}
#[derive(Debug, thiserror::Error)]
pub enum TagError {
#[error("too many tags: {got} (max {max} per object/bucket)")]
TooManyTags { got: usize, max: usize },
#[error("tag key must not be empty")]
EmptyKey,
#[error("tag key too long: {len} bytes (max {max})")]
KeyTooLong { len: usize, max: usize },
#[error("tag value too long: key {key:?} value is {len} bytes (max {max})")]
ValueTooLong { key: String, len: usize, max: usize },
#[error("duplicate tag key: {key:?}")]
DuplicateKey { key: String },
#[error("invalid tag header (URL-encoded): {0}")]
InvalidHeader(String),
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct TagSnapshot {
objects: Vec<((String, String), TagSet)>,
buckets: HashMap<String, TagSet>,
}
#[derive(Debug, Default)]
pub struct TagManager {
objects: RwLock<HashMap<(String, String), TagSet>>,
buckets: RwLock<HashMap<String, TagSet>>,
}
impl TagManager {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn put_object_tags(&self, bucket: &str, key: &str, tags: TagSet) {
crate::lock_recovery::recover_write(&self.objects, "tagging.objects")
.insert((bucket.to_owned(), key.to_owned()), tags);
}
#[must_use]
pub fn get_object_tags(&self, bucket: &str, key: &str) -> Option<TagSet> {
crate::lock_recovery::recover_read(&self.objects, "tagging.objects")
.get(&(bucket.to_owned(), key.to_owned()))
.cloned()
}
pub fn delete_object_tags(&self, bucket: &str, key: &str) {
crate::lock_recovery::recover_write(&self.objects, "tagging.objects")
.remove(&(bucket.to_owned(), key.to_owned()));
}
pub fn put_bucket_tags(&self, bucket: &str, tags: TagSet) {
crate::lock_recovery::recover_write(&self.buckets, "tagging.buckets")
.insert(bucket.to_owned(), tags);
}
#[must_use]
pub fn get_bucket_tags(&self, bucket: &str) -> Option<TagSet> {
crate::lock_recovery::recover_read(&self.buckets, "tagging.buckets")
.get(bucket)
.cloned()
}
pub fn delete_bucket_tags(&self, bucket: &str) {
crate::lock_recovery::recover_write(&self.buckets, "tagging.buckets").remove(bucket);
}
pub fn to_json(&self) -> Result<String, serde_json::Error> {
let objects: Vec<((String, String), TagSet)> =
crate::lock_recovery::recover_read(&self.objects, "tagging.objects")
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let buckets = crate::lock_recovery::recover_read(&self.buckets, "tagging.buckets").clone();
let snap = TagSnapshot { objects, buckets };
serde_json::to_string(&snap)
}
pub fn from_json(s: &str) -> Result<Self, serde_json::Error> {
let snap: TagSnapshot = serde_json::from_str(s)?;
let mut objects = HashMap::with_capacity(snap.objects.len());
for (k, v) in snap.objects {
objects.insert(k, v);
}
Ok(Self {
objects: RwLock::new(objects),
buckets: RwLock::new(snap.buckets),
})
}
}
pub fn parse_tagging_header(header: &str) -> Result<TagSet, TagError> {
let trimmed = header.trim();
if trimmed.is_empty() {
return Ok(TagSet::new());
}
let mut pairs = Vec::new();
for part in trimmed.split('&') {
if part.is_empty() {
continue;
}
let (raw_k, raw_v) = match part.split_once('=') {
Some((k, v)) => (k, v),
None => (part, ""),
};
let k = url_decode(raw_k)
.map_err(|e| TagError::InvalidHeader(format!("key {raw_k:?}: {e}")))?;
let v = url_decode(raw_v)
.map_err(|e| TagError::InvalidHeader(format!("value {raw_v:?}: {e}")))?;
pairs.push((k, v));
}
TagSet::from_pairs(pairs)
}
#[must_use]
pub fn render_tagging_header(tags: &TagSet) -> String {
let mut out = String::new();
for (i, (k, v)) in tags.iter().enumerate() {
if i > 0 {
out.push('&');
}
url_encode_to(&mut out, k);
out.push('=');
url_encode_to(&mut out, v);
}
out
}
fn url_decode(s: &str) -> Result<String, String> {
let bytes = s.as_bytes();
let mut out = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'+' => {
out.push(b' ');
i += 1;
}
b'%' => {
if i + 2 >= bytes.len() {
return Err(format!("truncated %-escape at byte {i}"));
}
let hi = hex_digit(bytes[i + 1])
.ok_or_else(|| format!("non-hex byte after % at {}", i + 1))?;
let lo = hex_digit(bytes[i + 2])
.ok_or_else(|| format!("non-hex byte after % at {}", i + 2))?;
out.push((hi << 4) | lo);
i += 3;
}
b => {
out.push(b);
i += 1;
}
}
}
String::from_utf8(out).map_err(|e| format!("invalid UTF-8: {e}"))
}
fn hex_digit(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'a'..=b'f' => Some(10 + b - b'a'),
b'A'..=b'F' => Some(10 + b - b'A'),
_ => None,
}
}
fn url_encode_to(out: &mut String, s: &str) {
for &b in s.as_bytes() {
let unreserved =
b.is_ascii_alphanumeric() || b == b'-' || b == b'_' || b == b'.' || b == b'~';
if unreserved {
out.push(b as char);
} else {
out.push('%');
out.push(HEX[((b >> 4) & 0x0F) as usize] as char);
out.push(HEX[(b & 0x0F) as usize] as char);
}
}
}
const HEX: &[u8; 16] = b"0123456789ABCDEF";
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn from_pairs_too_many_rejected() {
let pairs: Vec<(String, String)> = (0..11)
.map(|i| (format!("k{i}"), format!("v{i}")))
.collect();
let err = TagSet::from_pairs(pairs).expect_err("must reject 11 pairs");
assert!(
matches!(
err,
TagError::TooManyTags {
got: 11,
max: MAX_TAGS_PER_OBJECT
}
),
"got: {err:?}"
);
}
#[test]
fn from_pairs_long_key_rejected() {
let pairs = vec![("k".repeat(129), "v".into())];
let err = TagSet::from_pairs(pairs).expect_err("must reject 129-byte key");
assert!(
matches!(
err,
TagError::KeyTooLong {
len: 129,
max: MAX_TAG_KEY_BYTES
}
),
"got: {err:?}"
);
}
#[test]
fn from_pairs_long_value_rejected() {
let pairs = vec![("k".into(), "v".repeat(257))];
let err = TagSet::from_pairs(pairs).expect_err("must reject 257-byte value");
assert!(
matches!(
err,
TagError::ValueTooLong { ref key, len: 257, max: MAX_TAG_VALUE_BYTES }
if key == "k"
),
"got: {err:?}"
);
}
#[test]
fn from_pairs_at_limits_accepted() {
let pairs: Vec<(String, String)> = (0..10)
.map(|i| {
let k = format!("k{i}");
let v = format!("v{i}");
let k = format!("{k:k<128}");
let v = format!("{v:v<256}");
(k, v)
})
.collect();
for (k, v) in &pairs {
assert_eq!(k.len(), 128);
assert_eq!(v.len(), 256);
}
let s = TagSet::from_pairs(pairs).expect("at-limit pairs must pass");
assert_eq!(s.len(), 10);
}
#[test]
fn parse_tagging_header_basic() {
let s = parse_tagging_header("K1=V1&K2=V2").expect("parse");
assert_eq!(s.len(), 2);
assert_eq!(s.get("K1"), Some("V1"));
assert_eq!(s.get("K2"), Some("V2"));
}
#[test]
fn parse_tagging_header_url_encoded_values() {
let s = parse_tagging_header("Path=foo%2Fbar&Greet=hello%20world&Plus=a+b").expect("parse");
assert_eq!(s.get("Path"), Some("foo/bar"));
assert_eq!(s.get("Greet"), Some("hello world"));
assert_eq!(s.get("Plus"), Some("a b"));
}
#[test]
fn parse_tagging_header_empty_value() {
let s = parse_tagging_header("Bare").expect("parse");
assert_eq!(s.get("Bare"), Some(""));
let s2 = parse_tagging_header("K=").expect("parse");
assert_eq!(s2.get("K"), Some(""));
}
#[test]
fn parse_tagging_header_empty_returns_empty_set() {
let s = parse_tagging_header("").expect("parse");
assert!(s.is_empty());
let s2 = parse_tagging_header(" ").expect("parse");
assert!(s2.is_empty());
}
#[test]
fn parse_tagging_header_truncated_escape_rejected() {
let err = parse_tagging_header("K=%2").expect_err("truncated");
assert!(matches!(err, TagError::InvalidHeader(_)));
}
#[test]
fn render_tagging_header_round_trip() {
let original = TagSet::from_pairs(vec![
("Project".into(), "Phoenix".into()),
("Env".into(), "prod with space".into()),
("Path".into(), "data/2026".into()),
])
.expect("ts");
let rendered = render_tagging_header(&original);
let parsed = parse_tagging_header(&rendered).expect("parse");
assert_eq!(parsed, original);
}
#[test]
fn manager_object_put_get_delete() {
let m = TagManager::new();
let tags = TagSet::from_pairs(vec![("Owner".into(), "alice".into())]).expect("ts");
m.put_object_tags("b", "k", tags.clone());
assert_eq!(m.get_object_tags("b", "k"), Some(tags));
m.delete_object_tags("b", "k");
assert!(m.get_object_tags("b", "k").is_none());
m.delete_object_tags("b", "k");
}
#[test]
fn manager_bucket_put_get_delete() {
let m = TagManager::new();
let tags = TagSet::from_pairs(vec![("CostCenter".into(), "42".into())]).expect("ts");
m.put_bucket_tags("b", tags.clone());
assert_eq!(m.get_bucket_tags("b"), Some(tags));
m.delete_bucket_tags("b");
assert!(m.get_bucket_tags("b").is_none());
}
#[test]
fn manager_object_and_bucket_independent() {
let m = TagManager::new();
m.put_object_tags(
"b",
"k",
TagSet::from_pairs(vec![("o".into(), "1".into())]).unwrap(),
);
m.put_bucket_tags(
"b",
TagSet::from_pairs(vec![("b".into(), "2".into())]).unwrap(),
);
assert_eq!(m.get_object_tags("b", "k").unwrap().get("o"), Some("1"));
assert!(m.get_object_tags("b", "k").unwrap().get("b").is_none());
assert_eq!(m.get_bucket_tags("b").unwrap().get("b"), Some("2"));
assert!(m.get_bucket_tags("b").unwrap().get("o").is_none());
}
#[test]
fn manager_json_snapshot_round_trip() {
let m = TagManager::new();
m.put_object_tags(
"b1",
"k1",
TagSet::from_pairs(vec![("Project".into(), "Phoenix".into())]).unwrap(),
);
m.put_object_tags(
"b2",
"k2",
TagSet::from_pairs(vec![("Env".into(), "prod".into())]).unwrap(),
);
m.put_bucket_tags(
"b1",
TagSet::from_pairs(vec![("CostCenter".into(), "42".into())]).unwrap(),
);
let json = m.to_json().expect("to_json");
let m2 = TagManager::from_json(&json).expect("from_json");
assert_eq!(
m2.get_object_tags("b1", "k1").unwrap().get("Project"),
Some("Phoenix")
);
assert_eq!(
m2.get_object_tags("b2", "k2").unwrap().get("Env"),
Some("prod")
);
assert_eq!(
m2.get_bucket_tags("b1").unwrap().get("CostCenter"),
Some("42")
);
}
#[test]
fn tag_set_get_last_wins_on_duplicate_keys() {
let s = TagSet(vec![("K".into(), "A".into()), ("K".into(), "B".into())]);
assert_eq!(s.get("K"), Some("B"));
}
#[test]
fn parse_tagging_header_empty_key_rejected() {
let err = parse_tagging_header("=value").expect_err("empty key");
assert!(matches!(err, TagError::EmptyKey), "got: {err:?}");
}
#[test]
fn parse_tagging_header_long_key_rejected() {
let header = format!("{}=v", "k".repeat(129));
let err = parse_tagging_header(&header).expect_err("129-byte key");
assert!(
matches!(
err,
TagError::KeyTooLong {
len: 129,
max: MAX_TAG_KEY_BYTES
}
),
"got: {err:?}"
);
}
#[test]
fn parse_tagging_header_long_value_rejected() {
let header = format!("k={}", "v".repeat(257));
let err = parse_tagging_header(&header).expect_err("257-byte value");
assert!(
matches!(
err,
TagError::ValueTooLong { ref key, len: 257, max: MAX_TAG_VALUE_BYTES }
if key == "k"
),
"got: {err:?}"
);
}
#[test]
fn parse_tagging_header_duplicate_key_rejected() {
let err = parse_tagging_header("K=A&K=B").expect_err("dup key");
assert!(
matches!(err, TagError::DuplicateKey { ref key } if key == "K"),
"got: {err:?}"
);
}
#[test]
fn parse_tagging_header_too_many_tags_rejected() {
let header: String = (0..11)
.map(|i| format!("k{i}=v{i}"))
.collect::<Vec<_>>()
.join("&");
let err = parse_tagging_header(&header).expect_err("11 tags");
assert!(
matches!(
err,
TagError::TooManyTags {
got: 11,
max: MAX_TAGS_PER_OBJECT
}
),
"got: {err:?}"
);
}
#[test]
fn tagging_to_json_after_panic_recovers_via_poison() {
let m = TagManager::new();
m.put_object_tags(
"b",
"k",
TagSet::from_pairs(vec![("Project".into(), "Phoenix".into())]).expect("valid"),
);
let m = std::sync::Arc::new(m);
let m_cl = std::sync::Arc::clone(&m);
let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let mut g = m_cl.objects.write().expect("clean lock");
g.entry(("b".into(), "k2".into())).or_default();
panic!("force-poison");
}));
assert!(
m.objects.is_poisoned(),
"write panic must poison objects lock"
);
let json = m.to_json().expect("to_json after poison must succeed");
let m2 = TagManager::from_json(&json).expect("from_json");
assert!(m2.get_object_tags("b", "k").is_some());
}
}