vyre-conform 0.1.0

Conformance suite for vyre backends — proves byte-identical output to CPU reference
Documentation
//! Specification for the `decode.url` operation.
use crate::{Convention, DataType, OpSignature, OpSpec};

/// Location-agnostic operation metadata.
pub const VYRE_OP_METADATA: vyre_spec::OpMetadata = vyre_spec::OpMetadata {
    id: "decode.url",
    layer: vyre_spec::Layer::L2,
    category: vyre_spec::MetadataCategory::A,
    version: 1,
    description: "decode url",
    signature: "(Bytes) -> Bytes",
    strictness: "strict",
    archetype_signature: "(Bytes) -> Bytes",
};

/// Golden samples for this op.
pub const GOLDEN: &[vyre_spec::GoldenSample] = &[vyre_spec::GoldenSample {
    op_id: "decode.url",
    input: b"hello",
    expected: b"hello",
    reason: "no-escape input round-trips byte-for-byte",
}];

/// Known-answer tests for this op, derived from RFC 3986 §2 percent-encoding rules.
pub const KAT: &[vyre_spec::KatVector] = &[
    vyre_spec::KatVector {
        input: b"",
        expected: b"",
        source: "RFC 3986 §2 (empty)",
    },
    vyre_spec::KatVector {
        input: b"hello",
        expected: b"hello",
        source: "RFC 3986 §2 (unreserved)",
    },
    vyre_spec::KatVector {
        input: b"hello%20world",
        expected: b"hello world",
        source: "RFC 3986 §2.1 (percent-encoded SP)",
    },
    vyre_spec::KatVector {
        input: b"a%2Bb",
        expected: b"a+b",
        source: "RFC 3986 §2.1 (percent-encoded '+')",
    },
    vyre_spec::KatVector {
        input: b"%2F%3A%3F%23",
        expected: b"/:?#",
        source: "RFC 3986 §2.2 (gen-delims)",
    },
    vyre_spec::KatVector {
        input: b"%e2%98%83",
        expected: b"\xe2\x98\x83",
        source: "RFC 3986 §2.5 (UTF-8 snowman)",
    },
    vyre_spec::KatVector {
        input: b"mixed%2Bcase%3d",
        expected: b"mixed+case=",
        source: "hand-verified mixed case literals",
    },
];

/// Adversarial inputs for this op.
pub const ADVERSARIAL: &[vyre_spec::AdversarialInput] = &[
    vyre_spec::AdversarialInput {
        input: b"",
        reason: "empty input exercises zero-length branch",
    },
    vyre_spec::AdversarialInput {
        input: b"%",
        reason: "truncated escape — percent with no hex digits",
    },
    vyre_spec::AdversarialInput {
        input: b"%2",
        reason: "truncated escape — percent with one hex digit",
    },
    vyre_spec::AdversarialInput {
        input: b"%GG",
        reason: "invalid hex after percent must be rejected, never silently zeroed",
    },
    vyre_spec::AdversarialInput {
        input: b"%20%",
        reason: "valid prefix followed by truncated escape — state after valid escape",
    },
];

/// Build the OpSpec for this decode operation.
#[inline]
pub fn vyre_op() -> OpSpec {
    let id = "decode.url";
    OpSpec::builder(id)
        .signature(OpSignature {
            inputs: vec![DataType::Bytes],
            output: DataType::Bytes,
        })
        .cpu_fn(cpu_fn)
        .wgsl_fn(wgsl_fn)
        .category(crate::Category::A {
            composition_of: vec![id],
        })
        .laws(vec![crate::spec::law::AlgebraicLaw::Bounded {
            lo: 0,
            hi: u32::MAX,
        }])
        .strictness(crate::spec::types::Strictness::Strict)
        .version(1)
        .alt_wgsl_fns(vec![("category_a_handwritten", wgsl_fn)])
        .convention(Convention::V1)
        .boundary_values(vec![
            crate::spec::types::BoundaryValue {
                label: "empty",
                inputs: vec![0],
            },
            crate::spec::types::BoundaryValue {
                label: "single_element",
                inputs: vec![1],
            },
            crate::spec::types::BoundaryValue {
                label: "boundary",
                inputs: vec![255],
            },
            crate::spec::types::BoundaryValue {
                label: "max",
                inputs: vec![u32::MAX],
            },
        ])
        .equivalence_classes(vec![
            crate::spec::types::EquivalenceClass::specific("empty input", vec![0]),
            crate::spec::types::EquivalenceClass::specific("typical input", vec![42]),
            crate::spec::types::EquivalenceClass::specific("boundary input", vec![255]),
        ])
        .expect("Fix: checked-in conform spec must satisfy the typestate builder")
}

/// CPU reference implementation for RFC 3986 percent-decoding.
///
/// Non-percent bytes are copied through unchanged; `%HH` with valid
/// hex produces the decoded byte. A trailing `%` or `%H` with too
/// few bytes is preserved literally rather than dropped — matching
/// browser/URL-lib behavior on malformed input. The prior
/// implementation only emitted bytes when it saw a valid escape,
/// silently discarding every non-percent byte (so "hello" decoded
/// to ""), which was caught by the golden-sample round-trip test.
#[inline]
pub fn cpu_fn(input: &[u8]) -> Vec<u8> {
    let mut out = Vec::with_capacity(input.len());
    let mut cursor = 0;
    while cursor < input.len() {
        if input[cursor] == b'%' && cursor + 2 < input.len() {
            if let (Some(hi), Some(lo)) =
                (hex_value(input[cursor + 1]), hex_value(input[cursor + 2]))
            {
                out.push((hi << 4) | lo);
                cursor += 3;
                continue;
            }
        }
        out.push(input[cursor]);
        cursor += 1;
    }
    out
}

fn hex_value(value: u8) -> Option<u8> {
    match value {
        b'0'..=b'9' => Some(value - b'0'),
        b'A'..=b'F' => Some(value - b'A' + 10),
        b'a'..=b'f' => Some(value - b'a' + 10),
        _ => None,
    }
}

/// WGSL shader source.
#[inline]
pub fn wgsl_fn() -> String {
    r#"
fn hex_value(value: u32) -> i32 {
    if (value >= 48u && value <= 57u) { return i32(value - 48u); }
    if (value >= 65u && value <= 70u) { return i32(value - 55u); }
    if (value >= 97u && value <= 102u) { return i32(value - 87u); }
    return -1;
}

fn vyre_op(index: u32, input_len: u32) -> u32 {
    var seen = 0u;
    var cursor = 0u;
    loop {
        if (cursor + 2u >= input_len) { break; }
        let hi = hex_value(input.data[cursor + 1u]);
        let lo = hex_value(input.data[cursor + 2u]);
        if (input.data[cursor] == 37u && hi >= 0 && lo >= 0) {
            if (seen == index) { return (u32(hi) << 4u) | u32(lo); }
            seen = seen + 1u;
        }
        cursor = cursor + 1u;
    }
    return 0u;
}
"#
    .to_string()
}