weave-image 0.2.3

Thumbnail download, resize, and WebP conversion for OSINT knowledge graphs
Documentation
use std::io::Cursor;
use std::time::Duration;

use image::imageops::FilterType;
use image::{DynamicImage, ImageReader};
use sha2::{Digest, Sha256};

/// Maximum source image download size (5 MB).
const MAX_DOWNLOAD_BYTES: u64 = 5 * 1024 * 1024;

/// Download timeout.
const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(15);

/// Output thumbnail dimensions.
const THUMB_WIDTH: u32 = 256;
const THUMB_HEIGHT: u32 = 256;

/// Maximum output thumbnail size (50 KB).
const MAX_OUTPUT_BYTES: usize = 50 * 1024;

/// SHA-256 hex prefix length for thumbnail keys.
const KEY_HEX_LEN: usize = 32;

/// Result of processing a thumbnail.
#[derive(Debug, Clone)]
pub struct ThumbnailResult {
    /// Object key for storage: `thumbnails/{sha256_hex[0..32]}.webp`
    pub key: String,
    /// WebP image bytes.
    pub data: Vec<u8>,
}

/// Errors that can occur during thumbnail processing.
#[derive(Debug)]
pub enum Error {
    Download(String),
    TooLarge(u64),
    Decode(String),
    Encode(String),
}

impl std::fmt::Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Download(msg) => write!(f, "download failed: {msg}"),
            Self::TooLarge(size) => write!(
                f,
                "source too large: {size} bytes (max {MAX_DOWNLOAD_BYTES})"
            ),
            Self::Decode(msg) => write!(f, "image decode failed: {msg}"),
            Self::Encode(msg) => write!(f, "webp encode failed: {msg}"),
        }
    }
}

impl std::error::Error for Error {}

/// Compute the thumbnail object key from a source URL.
///
/// Returns `thumbnails/{sha256_hex(url)[0..32]}.webp`.
#[must_use]
pub fn thumbnail_key(source_url: &str) -> String {
    let mut hasher = Sha256::new();
    hasher.update(source_url.as_bytes());
    let hash = hasher.finalize();
    let hex = hex_encode(&hash);
    format!("thumbnails/{}.webp", &hex[..KEY_HEX_LEN])
}

/// Download image bytes from a URL.
///
/// Enforces a 5 MB size limit and 15s timeout.
///
/// # Errors
///
/// Returns `Error::Download` on network failure or non-success HTTP status,
/// `Error::TooLarge` if the response exceeds 5 MB.
pub fn download(url: &str) -> Result<Vec<u8>, Error> {
    let rt = tokio::runtime::Builder::new_current_thread()
        .enable_all()
        .build()
        .map_err(|e| Error::Download(e.to_string()))?;

    rt.block_on(download_async(url))
}

async fn download_async(url: &str) -> Result<Vec<u8>, Error> {
    let client = reqwest::Client::builder()
        .timeout(DOWNLOAD_TIMEOUT)
        .build()
        .map_err(|e| Error::Download(e.to_string()))?;

    let response = client
        .get(url)
        .send()
        .await
        .map_err(|e| Error::Download(e.to_string()))?;

    if !response.status().is_success() {
        return Err(Error::Download(format!("HTTP {}", response.status())));
    }

    if let Some(len) = response.content_length()
        && len > MAX_DOWNLOAD_BYTES
    {
        return Err(Error::TooLarge(len));
    }

    let bytes = response
        .bytes()
        .await
        .map_err(|e| Error::Download(e.to_string()))?;

    if bytes.len() as u64 > MAX_DOWNLOAD_BYTES {
        return Err(Error::TooLarge(bytes.len() as u64));
    }

    Ok(bytes.to_vec())
}

/// Resize image bytes to a WebP thumbnail with cover crop from center.
///
/// # Errors
///
/// Returns `Error::Decode` if the image cannot be parsed,
/// `Error::Encode` if WebP encoding fails or output exceeds 50 KB.
pub fn resize_to_webp(bytes: &[u8]) -> Result<Vec<u8>, Error> {
    let img = decode_image(bytes)?;
    let thumb = cover_crop(&img, THUMB_WIDTH, THUMB_HEIGHT);
    encode_webp(&thumb)
}

/// Download an image, resize to thumbnail, and compute the storage key.
///
/// # Errors
///
/// Returns errors from download or resize stages.
pub fn process_thumbnail(url: &str) -> Result<ThumbnailResult, Error> {
    let key = thumbnail_key(url);
    let bytes = download(url)?;
    let data = resize_to_webp(&bytes)?;
    Ok(ThumbnailResult { key, data })
}

fn decode_image(bytes: &[u8]) -> Result<DynamicImage, Error> {
    let cursor = Cursor::new(bytes);
    let reader = ImageReader::new(cursor)
        .with_guessed_format()
        .map_err(|e| Error::Decode(e.to_string()))?;
    reader.decode().map_err(|e| Error::Decode(e.to_string()))
}

/// Center-crop and resize to exactly `width x height`.
fn cover_crop(img: &DynamicImage, width: u32, height: u32) -> DynamicImage {
    let (iw, ih) = (img.width(), img.height());
    let target_ratio = f64::from(width) / f64::from(height);
    let source_ratio = f64::from(iw) / f64::from(ih);

    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
    let cropped = if source_ratio > target_ratio {
        let new_w = (f64::from(ih) * target_ratio) as u32;
        let x = (iw - new_w) / 2;
        img.crop_imm(x, 0, new_w, ih)
    } else {
        let new_h = (f64::from(iw) / target_ratio) as u32;
        let y = (ih - new_h) / 2;
        img.crop_imm(0, y, iw, new_h)
    };

    cropped.resize_exact(width, height, FilterType::Lanczos3)
}

fn encode_webp(img: &DynamicImage) -> Result<Vec<u8>, Error> {
    let mut buf = Cursor::new(Vec::new());
    img.write_with_encoder(image::codecs::webp::WebPEncoder::new_lossless(&mut buf))
        .map_err(|e| Error::Encode(e.to_string()))?;

    let data = buf.into_inner();
    if data.len() > MAX_OUTPUT_BYTES {
        return Err(Error::Encode(format!(
            "output too large: {} bytes (max {MAX_OUTPUT_BYTES})",
            data.len()
        )));
    }

    Ok(data)
}

fn hex_encode(bytes: &[u8]) -> String {
    bytes
        .iter()
        .fold(String::with_capacity(bytes.len() * 2), |mut s, b| {
            use std::fmt::Write;
            let _ = write!(s, "{b:02x}");
            s
        })
}

#[cfg(test)]
mod tests {
    use super::*;
    use image::ImageFormat;

    #[test]
    fn thumbnail_key_is_deterministic() {
        let key1 = thumbnail_key("https://example.com/photo.jpg");
        let key2 = thumbnail_key("https://example.com/photo.jpg");
        assert_eq!(key1, key2);
    }

    #[test]
    fn thumbnail_key_different_urls_differ() {
        let key1 = thumbnail_key("https://example.com/a.jpg");
        let key2 = thumbnail_key("https://example.com/b.jpg");
        assert_ne!(key1, key2);
    }

    #[test]
    fn thumbnail_key_format() {
        let key = thumbnail_key("https://example.com/photo.jpg");
        assert!(key.starts_with("thumbnails/"));
        assert!(key.ends_with(".webp"));
        let hex_part = &key["thumbnails/".len()..key.len() - ".webp".len()];
        assert_eq!(hex_part.len(), KEY_HEX_LEN);
        assert!(hex_part.chars().all(|c| c.is_ascii_hexdigit()));
    }

    #[test]
    fn resize_to_webp_produces_valid_output() {
        let img = DynamicImage::new_rgb8(800, 600);
        let mut png_bytes = Vec::new();
        img.write_to(&mut Cursor::new(&mut png_bytes), ImageFormat::Png)
            .ok();

        let webp = resize_to_webp(&png_bytes);
        assert!(webp.is_ok());
        let data = webp.ok();
        assert!(data.is_some());
        let data = data.unwrap_or_default();
        assert!(!data.is_empty());
        assert!(data.len() <= MAX_OUTPUT_BYTES);
    }

    #[test]
    fn resize_to_webp_is_256x256() {
        let img = DynamicImage::new_rgb8(1024, 768);
        let mut png_bytes = Vec::new();
        img.write_to(&mut Cursor::new(&mut png_bytes), ImageFormat::Png)
            .ok();

        let webp = resize_to_webp(&png_bytes);
        assert!(webp.is_ok());
        let data = webp.unwrap_or_default();

        let decoded = ImageReader::new(Cursor::new(&data))
            .with_guessed_format()
            .ok()
            .and_then(|r| r.decode().ok());
        assert!(decoded.is_some());
        let decoded = decoded.unwrap_or_else(|| DynamicImage::new_rgb8(0, 0));
        assert_eq!(decoded.width(), THUMB_WIDTH);
        assert_eq!(decoded.height(), THUMB_HEIGHT);
    }

    #[test]
    fn resize_to_webp_portrait_image() {
        let img = DynamicImage::new_rgb8(400, 1200);
        let mut png_bytes = Vec::new();
        img.write_to(&mut Cursor::new(&mut png_bytes), ImageFormat::Png)
            .ok();

        let webp = resize_to_webp(&png_bytes);
        assert!(webp.is_ok());
    }

    #[test]
    fn resize_to_webp_square_image() {
        let img = DynamicImage::new_rgb8(500, 500);
        let mut png_bytes = Vec::new();
        img.write_to(&mut Cursor::new(&mut png_bytes), ImageFormat::Png)
            .ok();

        let webp = resize_to_webp(&png_bytes);
        assert!(webp.is_ok());
    }

    #[test]
    fn resize_to_webp_tiny_image() {
        let img = DynamicImage::new_rgb8(16, 16);
        let mut png_bytes = Vec::new();
        img.write_to(&mut Cursor::new(&mut png_bytes), ImageFormat::Png)
            .ok();

        let webp = resize_to_webp(&png_bytes);
        assert!(webp.is_ok());
    }

    #[test]
    fn resize_to_webp_invalid_bytes_fails() {
        let result = resize_to_webp(b"not an image");
        assert!(result.is_err());
    }

    #[test]
    fn download_invalid_url_fails() {
        let result = download("not-a-url");
        assert!(result.is_err());
    }

    #[test]
    fn hex_encode_works() {
        assert_eq!(hex_encode(&[0x00, 0xff, 0xab]), "00ffab");
    }
}