tinyzip 0.4.0

Low level ZIP file parsing and navigation
Documentation
use std::io::{Cursor, Write};

/// The file we look up / extract in benchmarks.
pub const TARGET_FILE: &str = "src/core/data/processing/transform/csv/records.csv";

/// Generate a realistic ZIP archive (~1000 files, nested paths, a few multi-MB files).
pub fn generate() -> Vec<u8> {
    let buf = Cursor::new(Vec::with_capacity(4 * 1024 * 1024));
    let mut w = zip::write::ZipWriter::new(buf);
    let opts = zip::write::SimpleFileOptions::default()
        .compression_method(zip::CompressionMethod::Deflated);

    // A simple LCG for deterministic pseudo-random content (no extra deps).
    let mut rng: u64 = 0xDEAD_BEEF;
    let mut next = || -> u8 {
        rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
        (rng >> 33) as u8
    };

    // Directory structure mimicking a real monorepo
    let dirs = [
        "src/core/auth/providers/oauth2/google",
        "src/core/auth/providers/oauth2/github",
        "src/core/auth/providers/saml",
        "src/core/auth/session/storage/redis",
        "src/core/auth/session/middleware",
        "src/core/data/processing/transform/csv",
        "src/core/data/processing/transform/json",
        "src/core/data/processing/validate/schema",
        "src/core/data/processing/validate/constraints",
        "src/core/data/storage/adapters/postgres",
        "src/core/data/storage/adapters/sqlite",
        "src/core/data/storage/migrations/v1",
        "src/core/data/storage/migrations/v2",
        "src/frontend/ui/components/forms/inputs",
        "src/frontend/ui/components/forms/validation",
        "src/frontend/ui/components/layout/sidebar",
        "src/frontend/ui/components/layout/header",
        "src/frontend/ui/components/charts/timeseries",
        "src/frontend/ui/components/charts/histogram",
        "src/frontend/ui/hooks/queries",
        "src/frontend/ui/hooks/mutations",
        "src/backend/api/v1/handlers/users",
        "src/backend/api/v1/handlers/projects",
        "src/backend/api/v1/middleware/ratelimit",
        "src/backend/api/v1/middleware/cors",
        "src/backend/api/v2/handlers/users",
        "src/backend/api/v2/handlers/billing",
        "src/backend/workers/jobs/email/templates",
        "src/backend/workers/jobs/export/formats",
        "tests/unit/core/auth/providers",
        "tests/unit/core/data/processing",
        "tests/unit/frontend/components",
        "tests/unit/backend/api/v1",
        "tests/unit/backend/api/v2",
        "tests/integration/e2e/auth",
        "tests/integration/e2e/billing",
        "tests/integration/load/scenarios",
        "docs/api/v1/endpoints",
        "docs/api/v2/endpoints",
        "docs/guides/deployment/kubernetes",
        "docs/guides/deployment/docker",
        "docs/architecture/decisions/records",
        "assets/images/icons/social",
        "assets/images/icons/navigation",
        "assets/fonts/latin/variable",
        "config/environments/staging",
        "config/environments/production",
        "scripts/ci/pipelines/deploy",
        "scripts/ci/pipelines/test",
    ];

    let extensions = [
        (".rs", b"use std::collections::HashMap;\nfn process(input: &[u8]) -> Vec<u8> {\n    input.iter().map(|b| b.wrapping_add(1)).collect()\n}\n" as &[u8]),
        (".ts", b"export function transform(data: Record<string, unknown>): string {\n  return JSON.stringify(data, null, 2);\n}\n"),
        (".html", b"<div class=\"container\"><section><h2>Title</h2><p>Content goes here.</p></section></div>\n"),
        (".json", b"{\"id\": 1, \"name\": \"item\", \"tags\": [\"a\", \"b\"], \"metadata\": {\"created\": \"2025-01-01\"}}\n"),
        (".css", b".container { display: flex; gap: 1rem; }\n.header { font-size: 1.5rem; color: #333; }\n"),
        (".md", b"# Module Documentation\n\nThis module handles core processing logic.\n\n## Usage\n\n```rust\nlet result = process(&input);\n```\n"),
    ];

    let mut count = 0;

    // ~900 small-to-medium files spread across directories
    for dir in &dirs {
        for i in 0..50 {
            let (ext, template) = extensions[i % extensions.len()];
            let name = format!("{dir}/file_{i:03}{ext}");
            // Vary size: 1KB to 20KB by repeating template + pseudo-random suffix
            let repeats = 5 + (i * 7) % 100;
            w.start_file(&name, opts).unwrap();
            for _ in 0..repeats {
                w.write_all(template).unwrap();
            }
            // Add some unique bytes so compression ratios are realistic
            let suffix: Vec<u8> = (0..64).map(|_| next()).collect();
            w.write_all(&suffix).unwrap();
            count += 1;
        }
    }

    // The target CSV file (~34KB) for extract benchmarks
    w.start_file(TARGET_FILE, opts).unwrap();
    w.write_all(&b"id,name,email,score,department,created_at\n".repeat(1)).unwrap();
    for i in 0..1000 {
        let line = format!(
            "{i},user_{i},user_{i}@example.com,{},{},2025-{:02}-{:02}\n",
            50 + (i * 7) % 51,
            ["engineering", "sales", "marketing", "support", "research"][i % 5],
            1 + (i % 12),
            1 + (i % 28),
        );
        w.write_all(line.as_bytes()).unwrap();
    }
    count += 1;

    // A few large files (2-4MB each) — binary-ish with structure
    for i in 0..5 {
        let name = format!("assets/images/exports/batch_{i:02}/dataset.bin");
        w.start_file(&name, opts).unwrap();
        // 2MB of pseudo-random data with some structure (repeated blocks)
        let block: Vec<u8> = (0..4096).map(|_| next()).collect();
        for _ in 0..512 {
            w.write_all(&block).unwrap();
            // Mutate block slightly each time for realistic entropy
            let idx = next() as usize % block.len();
            let val = next();
            w.write_all(&[val ^ block[idx]]).unwrap();
        }
        count += 1;
    }

    // Root-level config files
    for name in [
        "Cargo.toml", "Cargo.lock", "README.md", "LICENSE",
        ".gitignore", "config/settings.toml", "config/environments/dev.toml",
        "config/environments/prod.toml",
    ] {
        w.start_file(name, opts).unwrap();
        let content = format!("# {name}\n").repeat(50 + name.len() * 10);
        w.write_all(content.as_bytes()).unwrap();
        count += 1;
    }

    eprintln!("Generated fixture: {count} files");
    w.finish().unwrap().into_inner()
}