ferro-oci-server 1.0.0

OCI Distribution Specification v1.1 server-side primitives — manifest / blob / tag / referrers handlers, chunked uploads, in-memory metadata plane. Backed by ferro-blob-store. Extracted from the Ferro ecosystem.
Documentation
// SPDX-License-Identifier: Apache-2.0
//! Blob-upload session state machine.
//!
//! Spec: OCI Distribution Spec v1.1 §4.3 "Pushing blobs".
//!
//! An upload session is created by `POST /v2/<name>/blobs/uploads/`
//! and identified by a UUID that appears in the `Location` header of
//! the response. Clients can then:
//!
//! - append chunks via `PATCH /v2/<name>/blobs/uploads/<uuid>` with a
//!   `Content-Range: <start>-<end>` header;
//! - finalize via `PUT /v2/<name>/blobs/uploads/<uuid>?digest=<digest>`;
//! - cancel via `DELETE /v2/<name>/blobs/uploads/<uuid>`.
//!
//! This module holds the data-only `UploadState` struct plus helpers
//! for parsing the `Content-Range` header. The actual persistence is
//! delegated to the `RegistryMeta` trait — the in-memory impl is
//! provided in [`crate::registry`].

use std::time::Instant;

use bytes::{Bytes, BytesMut};

/// Maximum number of bytes a single in-flight upload session may
/// accumulate before the server refuses further chunks.
///
/// The OCI Distribution Spec does not mandate a hard maximum blob size,
/// but §4.3 chunked uploads buffer bytes server-side, and an
/// unauthenticated client can otherwise open sessions and append
/// sub-limit chunks until process memory is exhausted (a
/// memory-exhaustion `DoS`). We bound each session at 4 GiB — large enough
/// for the multi-gigabyte layers real images carry, while still being a
/// concrete ceiling. When a session would exceed this, the handler
/// returns `413 Payload Too Large` with `BLOB_UPLOAD_INVALID` and the
/// session buffer is dropped.
///
/// Follow-up (tracked for the CHANGELOG): the current in-memory session
/// store keeps the whole upload in RAM. Spooling large uploads to disk
/// and expiring idle sessions are larger refactors; the size cap here is
/// the immediate closure of the unbounded-growth `DoS`.
pub const MAX_UPLOAD_SESSION_BYTES: u64 = 4 * 1024 * 1024 * 1024;

/// State of an in-flight blob upload.
///
/// Stored per upload UUID. Chunk bytes are accumulated in `buffer`
/// until the final `PUT` arrives and the client-declared digest is
/// compared against a recompute over the buffer.
#[derive(Debug, Clone)]
pub struct UploadState {
    /// Repository name the upload belongs to.
    pub name: String,
    /// Upload UUID generated by [`crate::registry::RegistryMeta::start_upload`].
    pub uuid: String,
    /// Accumulated bytes.
    pub buffer: BytesMut,
    /// Wall-clock instant of the last activity on this session (creation
    /// or the most recent appended chunk). Used by the registry to evict
    /// idle sessions after a TTL (R2-7).
    pub last_activity: Instant,
}

impl UploadState {
    /// Build a new empty upload state.
    #[must_use]
    pub fn new(name: impl Into<String>, uuid: impl Into<String>) -> Self {
        Self {
            name: name.into(),
            uuid: uuid.into(),
            buffer: BytesMut::new(),
            last_activity: Instant::now(),
        }
    }

    /// Current byte offset (= number of bytes buffered so far).
    #[must_use]
    pub fn offset(&self) -> u64 {
        self.buffer.len() as u64
    }

    /// Append a chunk, returning the new offset. Refreshes the
    /// last-activity timestamp so an actively-progressing upload is not
    /// swept by the idle-session TTL.
    pub fn append(&mut self, chunk: &Bytes) -> u64 {
        self.buffer.extend_from_slice(chunk);
        self.last_activity = Instant::now();
        self.offset()
    }

    /// True when this session has been idle (no creation/append activity)
    /// for at least `ttl` measured against `now`.
    #[must_use]
    pub fn is_idle_for(&self, now: Instant, ttl: std::time::Duration) -> bool {
        now.saturating_duration_since(self.last_activity) >= ttl
    }

    /// Take the accumulated bytes, leaving the buffer empty.
    pub fn take_bytes(&mut self) -> Bytes {
        std::mem::take(&mut self.buffer).freeze()
    }
}

/// Error returned when a `Content-Range` header cannot be parsed.
#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
pub enum ContentRangeParseError {
    /// The string did not match the expected `<start>-<end>` form.
    #[error("malformed Content-Range")]
    Malformed,
    /// `<start>` was greater than `<end>`.
    #[error("reversed range (start > end)")]
    Reversed,
}

/// Parsed `Content-Range: <start>-<end>` header.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ContentRange {
    /// Inclusive start byte offset.
    pub start: u64,
    /// Inclusive end byte offset.
    pub end: u64,
}

impl ContentRange {
    /// Parse a `Content-Range` header value as defined by Distribution
    /// Spec v1.1 §4.3 (different from RFC 7233 — no `bytes ` prefix,
    /// no total-length suffix).
    ///
    /// # Errors
    ///
    /// Returns [`ContentRangeParseError`] when the value is not `N-M`
    /// with `N <= M`.
    pub fn parse(value: &str) -> Result<Self, ContentRangeParseError> {
        let value = value.trim();
        // Accept both the `bytes N-M` (RFC 7233) and the bare `N-M`
        // forms so clients that serialize either one interoperate.
        let payload = value.strip_prefix("bytes ").unwrap_or(value);
        let payload = payload.split('/').next().unwrap_or(payload);
        let (start, end) = payload
            .split_once('-')
            .ok_or(ContentRangeParseError::Malformed)?;
        let start: u64 = start
            .trim()
            .parse()
            .map_err(|_| ContentRangeParseError::Malformed)?;
        let end: u64 = end
            .trim()
            .parse()
            .map_err(|_| ContentRangeParseError::Malformed)?;
        if start > end {
            return Err(ContentRangeParseError::Reversed);
        }
        Ok(Self { start, end })
    }

    /// Inclusive byte length, or `None` when the span overflows `u64`.
    ///
    /// The inclusive length is `end - start + 1`. For the degenerate
    /// range `0-u64::MAX` this is `u64::MAX + 1`, which overflows: in a
    /// debug build the naive `end - start + 1` panics, and in release it
    /// wraps to `0`, letting an empty `PATCH` body claim a full-range
    /// span (`0` bytes "==" a `0`-length body). We compute with
    /// `checked_*` so callers can reject the overflowing range as
    /// `BLOB_UPLOAD_INVALID` rather than crash or mis-validate.
    #[must_use]
    pub const fn checked_length(self) -> Option<u64> {
        match self.end.checked_sub(self.start) {
            Some(span) => span.checked_add(1),
            None => None,
        }
    }

    /// Inclusive byte length.
    ///
    /// Saturates at `u64::MAX` when the true inclusive length would
    /// overflow (the `0-u64::MAX` edge). Prefer [`Self::checked_length`]
    /// when the overflow must be surfaced as an error; this convenience
    /// accessor never panics and never wraps to `0`.
    #[must_use]
    pub const fn length(self) -> u64 {
        match self.checked_length() {
            Some(len) => len,
            None => u64::MAX,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::{ContentRange, UploadState};
    use bytes::Bytes;

    #[test]
    fn append_updates_offset() {
        let mut s = UploadState::new("lib/alpine", "abc");
        assert_eq!(s.offset(), 0);
        let n1 = s.append(&Bytes::from_static(b"hello"));
        assert_eq!(n1, 5);
        let n2 = s.append(&Bytes::from_static(b"!"));
        assert_eq!(n2, 6);
    }

    #[test]
    fn take_bytes_returns_everything_and_resets() {
        let mut s = UploadState::new("lib/alpine", "abc");
        s.append(&Bytes::from_static(b"hello"));
        let out = s.take_bytes();
        assert_eq!(&out[..], b"hello");
        assert_eq!(s.offset(), 0);
    }

    #[test]
    fn content_range_parse_bare_form() {
        let r = ContentRange::parse("0-1023").expect("parse");
        assert_eq!(
            r,
            ContentRange {
                start: 0,
                end: 1023
            }
        );
        assert_eq!(r.length(), 1024);
    }

    #[test]
    fn content_range_parse_bytes_prefix() {
        let r = ContentRange::parse("bytes 100-199").expect("parse");
        assert_eq!(
            r,
            ContentRange {
                start: 100,
                end: 199
            }
        );
    }

    #[test]
    fn content_range_parse_with_total() {
        let r = ContentRange::parse("bytes 0-9/100").expect("parse");
        assert_eq!(r, ContentRange { start: 0, end: 9 });
    }

    #[test]
    fn content_range_rejects_reversed() {
        assert!(ContentRange::parse("10-5").is_err());
    }

    #[test]
    fn content_range_rejects_garbage() {
        assert!(ContentRange::parse("not-a-range").is_err());
        assert!(ContentRange::parse("").is_err());
    }

    #[test]
    fn checked_length_handles_full_u64_range_without_overflow() {
        // R2-2: `0-u64::MAX` would overflow `end - start + 1`. In debug
        // this panics; in release it wraps to 0. `checked_length` must
        // return `None` (the caller rejects), and the panicking `length`
        // accessor must saturate rather than wrap.
        let r = ContentRange::parse(&format!("0-{}", u64::MAX)).expect("parse full range");
        assert_eq!(r.checked_length(), None, "full-u64 span has no exact length");
        assert_eq!(r.length(), u64::MAX, "length() saturates, never wraps to 0");
    }

    #[test]
    fn checked_length_normal_range_is_exact() {
        let r = ContentRange::parse("0-1023").expect("parse");
        assert_eq!(r.checked_length(), Some(1024));
    }

    #[test]
    fn equal_start_end_is_a_valid_single_byte_range() {
        // Boundary for `if start > end`: `5-5` is the inclusive single
        // byte at offset 5 and MUST parse (length 1). Mutating `>` to
        // `>=` would reject this equal-bounds range. The reversed `6-5`
        // must still be rejected (so the comparison is not removed).
        let r = ContentRange::parse("5-5").expect("equal bounds is one byte");
        assert_eq!(r, ContentRange { start: 5, end: 5 });
        assert_eq!(r.length(), 1, "inclusive length of N-N is 1");
        assert!(
            ContentRange::parse("6-5").is_err(),
            "a genuinely reversed range stays rejected"
        );
    }
}