Skip to main content

s4_server/
tagging.rs

1//! Object + Bucket tagging (v0.6 #39).
2//!
3//! S3 attaches a `TagSet` (max 10 key/value pairs, key ≤ 128 bytes,
4//! value ≤ 256 bytes — AWS S3 spec) to each object, and another (also
5//! max 10) to each bucket. Tags are surfaced to the IAM policy
6//! evaluator via two condition keys:
7//!
8//! - `s3:ExistingObjectTag/<key>` — the existing tag attached to the
9//!   object the request is targeting (resolved via [`TagManager`] at
10//!   policy evaluation time).
11//! - `s3:RequestObjectTag/<key>` — a tag the caller is supplying as
12//!   part of *this* request, either via the `x-amz-tagging` URL-encoded
13//!   header on `PutObject`, or via the `Tagging` body field on
14//!   `PutObjectTagging`.
15//!
16//! ## scope (v0.6 #39)
17//!
18//! - **In-memory only** with optional JSON snapshot for restart-
19//!   recoverable state — same shape as `versioning.rs` /
20//!   `object_lock.rs`'s `--versioning-state-file` /
21//!   `--object-lock-state-file`.
22//! - **Per-(bucket, key) granularity**, no version-id-aware tag
23//!   attachment (matches the v0.5 #30 object-lock decision; AWS-style
24//!   per-version tags can be layered on top later).
25//! - **No charge / accounting** model — tags are stored, served, and
26//!   evaluated; cost-allocation reports are out of scope.
27//! - **No tag-key character validation** beyond the AWS length limits.
28//!   The wider AWS rule set (allowed character class, no `aws:` prefix
29//!   for user tags, etc.) is deferred — operators get the spec as it
30//!   relates to gating but can store any UTF-8 they like.
31//!
32//! ## scope-out (DO NOT touch — handled by sibling agents)
33//!
34//! - notification dispatch (#35), lifecycle expiration (#37)
35//! - ACL / replication / website / logging
36//! - per-version tag attachment
37
38use std::collections::HashMap;
39use std::sync::RwLock;
40
41use serde::{Deserialize, Serialize};
42
43/// AWS S3 max number of tags per object / bucket.
44pub const MAX_TAGS_PER_OBJECT: usize = 10;
45/// AWS S3 max length (in bytes) of a tag key.
46pub const MAX_TAG_KEY_BYTES: usize = 128;
47/// AWS S3 max length (in bytes) of a tag value.
48pub const MAX_TAG_VALUE_BYTES: usize = 256;
49
50/// An ordered tag set. Insertion order is preserved (mirrors the AWS
51/// XML wire format, which is order-significant for the response). For
52/// duplicates on the same key, the *last* pair wins on lookup, matching
53/// AWS S3 behaviour for `x-amz-tagging`.
54#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
55pub struct TagSet(pub Vec<(String, String)>);
56
57impl TagSet {
58    /// Empty tag set.
59    #[must_use]
60    pub fn new() -> Self {
61        Self::default()
62    }
63
64    /// Construct a tag set from `(key, value)` pairs, validating the
65    /// AWS S3 limits (max 10, key ≤ 128 B, value ≤ 256 B). Duplicate
66    /// keys are retained in insertion order; lookup picks the last one.
67    pub fn from_pairs(pairs: Vec<(String, String)>) -> Result<Self, TagError> {
68        let s = Self(pairs);
69        s.validate()?;
70        Ok(s)
71    }
72
73    /// Look up the value for `key`. Last-wins on duplicates.
74    #[must_use]
75    pub fn get(&self, key: &str) -> Option<&str> {
76        self.0
77            .iter()
78            .rev()
79            .find(|(k, _)| k == key)
80            .map(|(_, v)| v.as_str())
81    }
82
83    /// Iterate the pairs in insertion order.
84    pub fn iter(&self) -> impl Iterator<Item = &(String, String)> {
85        self.0.iter()
86    }
87
88    #[must_use]
89    pub fn len(&self) -> usize {
90        self.0.len()
91    }
92
93    #[must_use]
94    pub fn is_empty(&self) -> bool {
95        self.0.is_empty()
96    }
97
98    /// Enforce the AWS S3 size limits (count ≤ 10, key ≤ 128 B,
99    /// value ≤ 256 B). Called by [`Self::from_pairs`]; can also be
100    /// called directly when constructing a `TagSet` from external
101    /// input that wasn't validated yet.
102    pub fn validate(&self) -> Result<(), TagError> {
103        if self.0.len() > MAX_TAGS_PER_OBJECT {
104            return Err(TagError::TooMany { got: self.0.len() });
105        }
106        for (k, v) in &self.0 {
107            if k.len() > MAX_TAG_KEY_BYTES {
108                return Err(TagError::KeyTooLong { len: k.len() });
109            }
110            if v.len() > MAX_TAG_VALUE_BYTES {
111                return Err(TagError::ValueTooLong { len: v.len() });
112            }
113        }
114        Ok(())
115    }
116}
117
118/// Error class for tag-set construction / parse.
119#[derive(Debug, thiserror::Error)]
120pub enum TagError {
121    #[error("too many tags: {got} (max {})", MAX_TAGS_PER_OBJECT)]
122    TooMany { got: usize },
123    #[error("tag key too long: {len} bytes (max {})", MAX_TAG_KEY_BYTES)]
124    KeyTooLong { len: usize },
125    #[error("tag value too long: {len} bytes (max {})", MAX_TAG_VALUE_BYTES)]
126    ValueTooLong { len: usize },
127    #[error("invalid tag header (URL-encoded): {0}")]
128    InvalidHeader(String),
129}
130
131/// JSON snapshot wrapper. Tuple keys can't roundtrip through
132/// `HashMap` JSON, so the object map is flattened to a `Vec`.
133#[derive(Debug, Default, Serialize, Deserialize)]
134struct TagSnapshot {
135    objects: Vec<((String, String), TagSet)>,
136    buckets: HashMap<String, TagSet>,
137}
138
139/// Owns the per-(bucket, key) and per-bucket tag state. All operations
140/// take the inner `RwLock`; cloning a manager is intentionally not
141/// supported — share via `Arc<TagManager>`.
142#[derive(Debug, Default)]
143pub struct TagManager {
144    /// `(bucket, key) → tags`
145    objects: RwLock<HashMap<(String, String), TagSet>>,
146    /// `bucket → tags`
147    buckets: RwLock<HashMap<String, TagSet>>,
148}
149
150impl TagManager {
151    /// Empty manager.
152    #[must_use]
153    pub fn new() -> Self {
154        Self::default()
155    }
156
157    /// Replace (or create) the object-level tag set. AWS PutObjectTagging
158    /// is a full-replace operation (no merge), so we mirror that.
159    pub fn put_object_tags(&self, bucket: &str, key: &str, tags: TagSet) {
160        self.objects
161            .write()
162            .expect("tagging objects RwLock poisoned")
163            .insert((bucket.to_owned(), key.to_owned()), tags);
164    }
165
166    /// Borrow-clone the object-level tag set. `None` when no tags have
167    /// been set for `(bucket, key)`.
168    #[must_use]
169    pub fn get_object_tags(&self, bucket: &str, key: &str) -> Option<TagSet> {
170        self.objects
171            .read()
172            .expect("tagging objects RwLock poisoned")
173            .get(&(bucket.to_owned(), key.to_owned()))
174            .cloned()
175    }
176
177    /// Drop the object-level tag set for `(bucket, key)` (idempotent —
178    /// missing entry is a no-op, matching AWS DeleteObjectTagging).
179    pub fn delete_object_tags(&self, bucket: &str, key: &str) {
180        self.objects
181            .write()
182            .expect("tagging objects RwLock poisoned")
183            .remove(&(bucket.to_owned(), key.to_owned()));
184    }
185
186    /// Replace (or create) the bucket-level tag set.
187    pub fn put_bucket_tags(&self, bucket: &str, tags: TagSet) {
188        self.buckets
189            .write()
190            .expect("tagging buckets RwLock poisoned")
191            .insert(bucket.to_owned(), tags);
192    }
193
194    /// Borrow-clone the bucket-level tag set.
195    #[must_use]
196    pub fn get_bucket_tags(&self, bucket: &str) -> Option<TagSet> {
197        self.buckets
198            .read()
199            .expect("tagging buckets RwLock poisoned")
200            .get(bucket)
201            .cloned()
202    }
203
204    /// Drop the bucket-level tag set (idempotent).
205    pub fn delete_bucket_tags(&self, bucket: &str) {
206        self.buckets
207            .write()
208            .expect("tagging buckets RwLock poisoned")
209            .remove(bucket);
210    }
211
212    /// JSON snapshot for restart-recoverable state. Pair with
213    /// [`Self::from_json`].
214    pub fn to_json(&self) -> Result<String, serde_json::Error> {
215        let objects: Vec<((String, String), TagSet)> = self
216            .objects
217            .read()
218            .expect("tagging objects RwLock poisoned")
219            .iter()
220            .map(|(k, v)| (k.clone(), v.clone()))
221            .collect();
222        let buckets = self
223            .buckets
224            .read()
225            .expect("tagging buckets RwLock poisoned")
226            .clone();
227        let snap = TagSnapshot { objects, buckets };
228        serde_json::to_string(&snap)
229    }
230
231    /// Restore from a JSON snapshot produced by [`Self::to_json`].
232    pub fn from_json(s: &str) -> Result<Self, serde_json::Error> {
233        let snap: TagSnapshot = serde_json::from_str(s)?;
234        let mut objects = HashMap::with_capacity(snap.objects.len());
235        for (k, v) in snap.objects {
236            objects.insert(k, v);
237        }
238        Ok(Self {
239            objects: RwLock::new(objects),
240            buckets: RwLock::new(snap.buckets),
241        })
242    }
243}
244
245/// Parse the AWS S3 `x-amz-tagging` request header. The wire format is
246/// a URL-encoded query string (`Project=Phoenix&Env=prod`); each pair
247/// is `key=value` with both halves percent-decoded. An empty header
248/// resolves to an empty `TagSet`. Keys without `=` are treated as
249/// `(key, "")` (matches `serde_urlencoded` / browser form-encode).
250///
251/// The parsed result is validated against the AWS S3 size limits.
252pub fn parse_tagging_header(header: &str) -> Result<TagSet, TagError> {
253    let trimmed = header.trim();
254    if trimmed.is_empty() {
255        return Ok(TagSet::new());
256    }
257    let mut pairs = Vec::new();
258    for part in trimmed.split('&') {
259        if part.is_empty() {
260            continue;
261        }
262        let (raw_k, raw_v) = match part.split_once('=') {
263            Some((k, v)) => (k, v),
264            None => (part, ""),
265        };
266        let k = url_decode(raw_k)
267            .map_err(|e| TagError::InvalidHeader(format!("key {raw_k:?}: {e}")))?;
268        let v = url_decode(raw_v)
269            .map_err(|e| TagError::InvalidHeader(format!("value {raw_v:?}: {e}")))?;
270        pairs.push((k, v));
271    }
272    TagSet::from_pairs(pairs)
273}
274
275/// Render a tag set as an AWS S3 `x-amz-tagging` URL-encoded string,
276/// suitable for the response echo header. Insertion order is
277/// preserved.
278#[must_use]
279pub fn render_tagging_header(tags: &TagSet) -> String {
280    let mut out = String::new();
281    for (i, (k, v)) in tags.iter().enumerate() {
282        if i > 0 {
283            out.push('&');
284        }
285        url_encode_to(&mut out, k);
286        out.push('=');
287        url_encode_to(&mut out, v);
288    }
289    out
290}
291
292/// Minimal `application/x-www-form-urlencoded` decoder: turns `+` into
293/// space (RFC 3986 form variant — AWS S3 accepts both `%20` and `+`)
294/// and resolves `%xx` escapes to their byte value. Returns an error
295/// when a `%` is not followed by two hex digits, or when the resulting
296/// bytes are not valid UTF-8.
297fn url_decode(s: &str) -> Result<String, String> {
298    let bytes = s.as_bytes();
299    let mut out = Vec::with_capacity(bytes.len());
300    let mut i = 0;
301    while i < bytes.len() {
302        match bytes[i] {
303            b'+' => {
304                out.push(b' ');
305                i += 1;
306            }
307            b'%' => {
308                if i + 2 >= bytes.len() {
309                    return Err(format!("truncated %-escape at byte {i}"));
310                }
311                let hi = hex_digit(bytes[i + 1])
312                    .ok_or_else(|| format!("non-hex byte after % at {}", i + 1))?;
313                let lo = hex_digit(bytes[i + 2])
314                    .ok_or_else(|| format!("non-hex byte after % at {}", i + 2))?;
315                out.push((hi << 4) | lo);
316                i += 3;
317            }
318            b => {
319                out.push(b);
320                i += 1;
321            }
322        }
323    }
324    String::from_utf8(out).map_err(|e| format!("invalid UTF-8: {e}"))
325}
326
327fn hex_digit(b: u8) -> Option<u8> {
328    match b {
329        b'0'..=b'9' => Some(b - b'0'),
330        b'a'..=b'f' => Some(10 + b - b'a'),
331        b'A'..=b'F' => Some(10 + b - b'A'),
332        _ => None,
333    }
334}
335
336/// Append `s` to `out`, percent-encoding everything that isn't an
337/// unreserved RFC 3986 character (`A-Za-z0-9-_.~`). Conservative —
338/// AWS accepts a wider class but never *requires* it, so we keep the
339/// output portable.
340fn url_encode_to(out: &mut String, s: &str) {
341    for &b in s.as_bytes() {
342        let unreserved = b.is_ascii_alphanumeric()
343            || b == b'-'
344            || b == b'_'
345            || b == b'.'
346            || b == b'~';
347        if unreserved {
348            out.push(b as char);
349        } else {
350            out.push('%');
351            out.push(HEX[((b >> 4) & 0x0F) as usize] as char);
352            out.push(HEX[(b & 0x0F) as usize] as char);
353        }
354    }
355}
356
357const HEX: &[u8; 16] = b"0123456789ABCDEF";
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[test]
364    fn from_pairs_too_many_rejected() {
365        let pairs: Vec<(String, String)> = (0..11)
366            .map(|i| (format!("k{i}"), format!("v{i}")))
367            .collect();
368        let err = TagSet::from_pairs(pairs).expect_err("must reject 11 pairs");
369        assert!(matches!(err, TagError::TooMany { got: 11 }));
370    }
371
372    #[test]
373    fn from_pairs_long_key_rejected() {
374        let pairs = vec![("k".repeat(129), "v".into())];
375        let err = TagSet::from_pairs(pairs).expect_err("must reject 129-byte key");
376        assert!(matches!(err, TagError::KeyTooLong { len: 129 }));
377    }
378
379    #[test]
380    fn from_pairs_long_value_rejected() {
381        let pairs = vec![("k".into(), "v".repeat(257))];
382        let err = TagSet::from_pairs(pairs).expect_err("must reject 257-byte value");
383        assert!(matches!(err, TagError::ValueTooLong { len: 257 }));
384    }
385
386    #[test]
387    fn from_pairs_at_limits_accepted() {
388        // Exactly 10 tags, key = 128 bytes, value = 256 bytes — at the
389        // boundary, must be accepted.
390        let pairs: Vec<(String, String)> = (0..10)
391            .map(|i| {
392                let k = format!("k{i}");
393                let v = format!("v{i}");
394                let k = format!("{k:k<128}");
395                let v = format!("{v:v<256}");
396                (k, v)
397            })
398            .collect();
399        // Sanity: lengths are at the limit.
400        for (k, v) in &pairs {
401            assert_eq!(k.len(), 128);
402            assert_eq!(v.len(), 256);
403        }
404        let s = TagSet::from_pairs(pairs).expect("at-limit pairs must pass");
405        assert_eq!(s.len(), 10);
406    }
407
408    #[test]
409    fn parse_tagging_header_basic() {
410        let s = parse_tagging_header("K1=V1&K2=V2").expect("parse");
411        assert_eq!(s.len(), 2);
412        assert_eq!(s.get("K1"), Some("V1"));
413        assert_eq!(s.get("K2"), Some("V2"));
414    }
415
416    #[test]
417    fn parse_tagging_header_url_encoded_values() {
418        // `%20` (space), `%2F` (slash), and `+` (form-style space).
419        let s = parse_tagging_header("Path=foo%2Fbar&Greet=hello%20world&Plus=a+b")
420            .expect("parse");
421        assert_eq!(s.get("Path"), Some("foo/bar"));
422        assert_eq!(s.get("Greet"), Some("hello world"));
423        assert_eq!(s.get("Plus"), Some("a b"));
424    }
425
426    #[test]
427    fn parse_tagging_header_empty_value() {
428        let s = parse_tagging_header("Bare").expect("parse");
429        assert_eq!(s.get("Bare"), Some(""));
430        let s2 = parse_tagging_header("K=").expect("parse");
431        assert_eq!(s2.get("K"), Some(""));
432    }
433
434    #[test]
435    fn parse_tagging_header_empty_returns_empty_set() {
436        let s = parse_tagging_header("").expect("parse");
437        assert!(s.is_empty());
438        let s2 = parse_tagging_header("   ").expect("parse");
439        assert!(s2.is_empty());
440    }
441
442    #[test]
443    fn parse_tagging_header_truncated_escape_rejected() {
444        let err = parse_tagging_header("K=%2").expect_err("truncated");
445        assert!(matches!(err, TagError::InvalidHeader(_)));
446    }
447
448    #[test]
449    fn render_tagging_header_round_trip() {
450        let original = TagSet::from_pairs(vec![
451            ("Project".into(), "Phoenix".into()),
452            ("Env".into(), "prod with space".into()),
453            ("Path".into(), "data/2026".into()),
454        ])
455        .expect("ts");
456        let rendered = render_tagging_header(&original);
457        let parsed = parse_tagging_header(&rendered).expect("parse");
458        assert_eq!(parsed, original);
459    }
460
461    #[test]
462    fn manager_object_put_get_delete() {
463        let m = TagManager::new();
464        let tags =
465            TagSet::from_pairs(vec![("Owner".into(), "alice".into())]).expect("ts");
466        m.put_object_tags("b", "k", tags.clone());
467        assert_eq!(m.get_object_tags("b", "k"), Some(tags));
468        m.delete_object_tags("b", "k");
469        assert!(m.get_object_tags("b", "k").is_none());
470        // Idempotent re-delete.
471        m.delete_object_tags("b", "k");
472    }
473
474    #[test]
475    fn manager_bucket_put_get_delete() {
476        let m = TagManager::new();
477        let tags =
478            TagSet::from_pairs(vec![("CostCenter".into(), "42".into())]).expect("ts");
479        m.put_bucket_tags("b", tags.clone());
480        assert_eq!(m.get_bucket_tags("b"), Some(tags));
481        m.delete_bucket_tags("b");
482        assert!(m.get_bucket_tags("b").is_none());
483    }
484
485    #[test]
486    fn manager_object_and_bucket_independent() {
487        // Setting an object tag must not pollute the bucket-level map
488        // (and vice versa). Regression guard for an early-prototype
489        // bug where both maps were keyed by `bucket` only.
490        let m = TagManager::new();
491        m.put_object_tags(
492            "b",
493            "k",
494            TagSet::from_pairs(vec![("o".into(), "1".into())]).unwrap(),
495        );
496        m.put_bucket_tags("b", TagSet::from_pairs(vec![("b".into(), "2".into())]).unwrap());
497        assert_eq!(m.get_object_tags("b", "k").unwrap().get("o"), Some("1"));
498        assert!(m.get_object_tags("b", "k").unwrap().get("b").is_none());
499        assert_eq!(m.get_bucket_tags("b").unwrap().get("b"), Some("2"));
500        assert!(m.get_bucket_tags("b").unwrap().get("o").is_none());
501    }
502
503    #[test]
504    fn manager_json_snapshot_round_trip() {
505        let m = TagManager::new();
506        m.put_object_tags(
507            "b1",
508            "k1",
509            TagSet::from_pairs(vec![("Project".into(), "Phoenix".into())]).unwrap(),
510        );
511        m.put_object_tags(
512            "b2",
513            "k2",
514            TagSet::from_pairs(vec![("Env".into(), "prod".into())]).unwrap(),
515        );
516        m.put_bucket_tags(
517            "b1",
518            TagSet::from_pairs(vec![("CostCenter".into(), "42".into())]).unwrap(),
519        );
520        let json = m.to_json().expect("to_json");
521        let m2 = TagManager::from_json(&json).expect("from_json");
522        assert_eq!(
523            m2.get_object_tags("b1", "k1").unwrap().get("Project"),
524            Some("Phoenix")
525        );
526        assert_eq!(
527            m2.get_object_tags("b2", "k2").unwrap().get("Env"),
528            Some("prod")
529        );
530        assert_eq!(
531            m2.get_bucket_tags("b1").unwrap().get("CostCenter"),
532            Some("42")
533        );
534    }
535
536    #[test]
537    fn tag_set_get_last_wins_on_duplicate_keys() {
538        // AWS x-amz-tagging "K=A&K=B" → look-up returns "B".
539        let s = parse_tagging_header("K=A&K=B").expect("parse");
540        assert_eq!(s.get("K"), Some("B"));
541    }
542}