demit 1.0.0

A flexible data generator for various domains
Documentation
use crate::common::DateTime;
use crate::domain::Record;
use rand::Rng;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};

/// A web server access log record
#[derive(Debug, Deserialize, Serialize)]
pub struct WeblogRecord {
    /// Timestamp of the request (RFC3339)
    timestamp: String,
    /// Client IP address
    ip_address: String,
    /// HTTP method (GET, POST, etc.)
    method: String,
    /// Request path
    path: String,
    /// HTTP status code
    status: u16,
    /// Response time in milliseconds
    response_time_ms: u32,
    /// Client user agent string
    user_agent: String,
    /// Response size in bytes
    bytes_sent: u32,
}

impl Record for WeblogRecord {
    fn generate(count: usize) -> Vec<Self> {
        (0..count).into_par_iter().map(|_| Self::random()).collect()
    }
}

impl WeblogRecord {
    fn random() -> Self {
        let mut rng = rand::rng();
        Self {
            timestamp: DateTime::gen_rfc3339(3), // dates back 3 years
            ip_address: Self::generate_ip(&mut rng),
            method: Self::generate_method(&mut rng),
            path: Self::generate_path(&mut rng),
            status: Self::generate_status(&mut rng),
            response_time_ms: Self::generate_response_time(&mut rng),
            user_agent: Self::generate_user_agent(&mut rng),
            bytes_sent: Self::generate_bytes_sent(&mut rng),
        }
    }

    fn generate_ip(rng: &mut impl Rng) -> String {
        format!(
            "{}.{}.{}.{}",
            rng.random_range(1..255),
            rng.random_range(0..255),
            rng.random_range(0..255),
            rng.random_range(1..255)
        )
    }

    fn generate_method(rng: &mut impl Rng) -> String {
        const METHODS: [&str; 5] = ["GET", "POST", "PUT", "DELETE", "PATCH"];
        const WEIGHTS: [i32; 5] = [70, 15, 8, 5, 2]; // weighted distribution

        let total_weight: i32 = WEIGHTS.iter().sum();
        let mut random_num = rng.random_range(0..total_weight);

        for (method, weight) in METHODS.iter().zip(WEIGHTS.iter()) {
            if random_num < *weight {
                return method.to_string();
            }
            random_num -= weight;
        }

        "GET".to_string() // fallback
    }

    fn generate_path(rng: &mut impl Rng) -> String {
        const PATHS: [&str; 10] = [
            "/",
            "/api/users",
            "/api/products",
            "/api/orders",
            "/login",
            "/signup",
            "/dashboard",
            "/profile",
            "/settings",
            "/logout",
        ];

        let base_path = PATHS[rng.random_range(0..PATHS.len())];

        // Sometimes add an ID to the path
        if base_path.starts_with("/api/") && rng.random_ratio(7, 10) {
            format!("{}/{}", base_path, rng.random_range(1..1000))
        } else {
            base_path.to_string()
        }
    }

    fn generate_status(rng: &mut impl Rng) -> u16 {
        match rng.random_range(1..100) {
            1..=70 => 200,  // 70% success
            71..=80 => 304, // 10% not modified
            81..=90 => 404, // 10% not found
            91..=95 => 401, // 5% unauthorized
            96..=98 => 500, // 3% server error
            _ => 503,       // 2% service unavailable
        }
    }

    fn generate_response_time(rng: &mut impl Rng) -> u32 {
        // 90% of requests are fast (10-500ms), 10% are slow (500-2000ms)
        if rng.random_ratio(9, 10) {
            rng.random_range(10..500)
        } else {
            rng.random_range(500..2000)
        }
    }

    fn generate_user_agent(rng: &mut impl Rng) -> String {
        const USER_AGENTS: [&str; 6] = [
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
            "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1",
            "Mozilla/5.0 (Linux; Android 11; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.120 Mobile Safari/537.36",
            "PostmanRuntime/7.28.1",
            "curl/7.64.1",
        ];
        USER_AGENTS[rng.random_range(0..USER_AGENTS.len())].to_string()
    }

    fn generate_bytes_sent(rng: &mut impl Rng) -> u32 {
        // Most responses are small (100B-10KB), some are medium (10KB-100KB), few are large (100KB-1MB)
        match rng.random_range(1..100) {
            1..=70 => rng.random_range(100..10_000),
            71..=90 => rng.random_range(10_000..100_000),
            _ => rng.random_range(100_000..1_000_000),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_generate_ip() {
        let mut rng = rand::rng();
        let ip = WeblogRecord::generate_ip(&mut rng);

        let parts = ip.split('.').collect::<Vec<_>>();
        assert_eq!(parts.len(), 4);

        // all parts should be between 0 and 255
        assert!(parts.iter().all(|part| part.parse::<u8>().is_ok()));
    }
}