vyre-conform 0.1.0

//! CPU BFS reference for graph operations.

use std::collections::VecDeque;

use crate::spec::types::{Convention, DataType, OpSignature, OpSpec};

/// Location-agnostic operation metadata.
pub const VYRE_OP_METADATA: vyre_spec::OpMetadata = vyre_spec::OpMetadata {
    id: "graph.bfs",
    layer: vyre_spec::Layer::L3,
    category: vyre_spec::MetadataCategory::A,
    version: 1,
    description: "graph bfs",
    signature: "(Bytes) -> Bytes",
    strictness: "strict",
    archetype_signature: "(Bytes) -> Bytes",
};

/// Golden samples for this op.
pub const GOLDEN: &[vyre_spec::GoldenSample] = &[vyre_spec::GoldenSample {
    op_id: "graph.bfs",
    input: &[],
    expected: &[0x00, 0x00, 0x00, 0x00],
    reason: "empty graph frame is rejected to the zero sentinel",
}];

/// Known-answer tests for this op.
///
/// The CPU reference reads a `VYRG`-framed graph, runs BFS from the
/// first declared source, and emits `(source, node, depth)` triples.
/// Invalid frames return the 4-byte zero sentinel.
pub const KAT: &[vyre_spec::KatVector] = &[
    vyre_spec::KatVector {
        input: &[],
        expected: &[0x00, 0x00, 0x00, 0x00],
        source: "empty input fails MAGIC check → ValidationError::InvalidFrame → zero sentinel",
    },
    vyre_spec::KatVector {
        input: b"VYR",
        expected: &[0x00, 0x00, 0x00, 0x00],
        source: "truncated magic (3 of 4 bytes) fails MAGIC length check → zero sentinel",
    },
    vyre_spec::KatVector {
        // VYRG + node_count=2, edge_count=1, source_count=1, max_depth=1
        // + edge (0,1) + source 0. BFS emits (0, 1, 1).
        input: &[
            0x56, 0x59, 0x52, 0x47, // "VYRG"
            0x02, 0x00, 0x00, 0x00, // node_count = 2
            0x01, 0x00, 0x00, 0x00, // edge_count = 1
            0x01, 0x00, 0x00, 0x00, // source_count = 1
            0x01, 0x00, 0x00, 0x00, // max_depth = 1
            0x00, 0x00, 0x00, 0x00, // edge from = 0
            0x01, 0x00, 0x00, 0x00, // edge to = 1
            0x00, 0x00, 0x00, 0x00, // source = 0
        ],
        expected: &[
            0x00, 0x00, 0x00, 0x00, // source = 0
            0x01, 0x00, 0x00, 0x00, // reached node = 1
            0x01, 0x00, 0x00, 0x00, // depth = 1
        ],
        source: "2-node 1-edge graph BFS from source 0: reaches node 1 at depth 1 (hand-traced against reachable_from at line 166)",
    },
    vyre_spec::KatVector {
        // VYRG + header (4 nodes, 3 edges, 1 source, max_depth 8)
        // + edges (0,1), (1,2), (0,3) + source 0.
        // Taken from the existing unit test `computes_single_source_bfs_frame`
        // at line 294, which already pins the expected BFS trace.
        input: &[
            0x56, 0x59, 0x52, 0x47, // "VYRG"
            0x04, 0x00, 0x00, 0x00, // node_count = 4
            0x03, 0x00, 0x00, 0x00, // edge_count = 3
            0x01, 0x00, 0x00, 0x00, // source_count = 1
            0x08, 0x00, 0x00, 0x00, // max_depth = 8
            0x00, 0x00, 0x00, 0x00, // edge 0 from
            0x01, 0x00, 0x00, 0x00, // edge 0 to
            0x01, 0x00, 0x00, 0x00, // edge 1 from
            0x02, 0x00, 0x00, 0x00, // edge 1 to
            0x00, 0x00, 0x00, 0x00, // edge 2 from
            0x03, 0x00, 0x00, 0x00, // edge 2 to
            0x00, 0x00, 0x00, 0x00, // source = 0
        ],
        expected: &[
            // (0, 1, 1)
            0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
            // (0, 3, 1)
            0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
            // (0, 2, 2)
            0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
        ],
        source: "4-node BFS trace from unit test `computes_single_source_bfs_frame` — (0,1,1), (0,3,1), (0,2,2) in queue-pop order",
    },
];

/// Adversarial inputs for this op.
pub const ADVERSARIAL: &[vyre_spec::AdversarialInput] = &[vyre_spec::AdversarialInput {
    input: b"VYR",
    reason: "truncated graph magic header exercises frame rejection",
}];

pub(crate) const MAGIC: &[u8; 4] = b"VYRG";
const HEADER_WORDS: usize = 4;
/// Maximum graph nodes accepted by CPU graph references.
pub const MAX_GRAPH_NODES: usize = 1_000_000;

/// Graph frame validation failure.
#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
pub enum ValidationError {
    /// The framed graph declares too many nodes for bounded CPU validation.
    #[error("GraphTooLarge: graph node count {actual} exceeds limit {max}. Fix: split the graph into bounded shards.")]
    GraphTooLarge {
        /// Declared graph node count.
        actual: usize,
        /// Maximum accepted graph node count.
        max: usize,
    },
    /// The graph frame is malformed.
    #[error("invalid graph frame. Fix: encode VYRG header, bounded counts, edges, and sources.")]
    InvalidFrame,
}

/// Parsed graph input for BFS traversal conformance testing.
#[derive(Debug)]
pub struct GraphInput {
    /// Number of nodes in the framed graph.
    pub node_count: usize,
    /// Directed graph edges.
    pub edges: Vec<(u32, u32)>,
    /// BFS source nodes.
    pub sources: Vec<u32>,
    /// Maximum traversal depth.
    pub max_depth: u32,
}

/// CPU reference for single-source BFS.
///
/// Returns a zero-padded output on invalid or empty input.
#[inline]
pub fn cpu(input: &[u8]) -> Vec<u8> {
    let Ok(graph) = parse_graph_input_checked(input) else {
        return vec![0; 4];
    };
    let Some(&source) = graph.sources.first() else {
        return vec![0; 4];
    };
    match reachable_from(&graph, source) {
        Ok(reached) => encode_reached(&reached),
        Err(_) => vec![0; 4],
    }
}

/// Parse the canonical graph conformance frame.
///
/// Format: magic `VYRG`, then little-endian u32 words:
/// `node_count, edge_count, source_count, max_depth`, `edge_count` pairs,
/// followed by `source_count` source node ids.
/// Parse raw bytes into a structured graph input.
#[inline]
pub fn parse_graph_input(input: &[u8]) -> Option<GraphInput> {
    parse_graph_input_checked(input).ok()
}

/// Parse the canonical graph conformance frame with explicit validation errors.
///
/// Returns [`ValidationError::GraphTooLarge`] before allocating adjacency
/// storage when the raw frame declares more than [`MAX_GRAPH_NODES`] nodes.
#[inline]
pub fn parse_graph_input_checked(input: &[u8]) -> Result<GraphInput, ValidationError> {
    if input.len() < MAGIC.len() || &input[..MAGIC.len()] != MAGIC {
        return Err(ValidationError::InvalidFrame);
    }
    let words = words(&input[MAGIC.len()..]).ok_or(ValidationError::InvalidFrame)?;
    if words.len() < HEADER_WORDS {
        return Err(ValidationError::InvalidFrame);
    }
    let node_count = words[0] as usize;
    if node_count > MAX_GRAPH_NODES {
        return Err(ValidationError::GraphTooLarge {
            actual: node_count,
            max: MAX_GRAPH_NODES,
        });
    }
    let edge_count = words[1] as usize;
    let source_count = words[2] as usize;
    let max_depth = words[3];
    let required = HEADER_WORDS
        .checked_add(
            edge_count
                .checked_mul(2)
                .ok_or(ValidationError::InvalidFrame)?,
        )
        .ok_or(ValidationError::InvalidFrame)?
        .checked_add(source_count)
        .ok_or(ValidationError::InvalidFrame)?;
    if words.len() < required {
        return Err(ValidationError::InvalidFrame);
    }

    let mut edges = Vec::with_capacity(edge_count);
    let mut offset = HEADER_WORDS;
    for _ in 0..edge_count {
        let from = words[offset];
        let to = words[offset + 1];
        if from as usize >= node_count || to as usize >= node_count {
            return Err(ValidationError::InvalidFrame);
        }
        edges.push((from, to));
        offset += 2;
    }

    let mut sources = Vec::with_capacity(source_count);
    for &source in &words[offset..offset + source_count] {
        if source as usize >= node_count {
            return Err(ValidationError::InvalidFrame);
        }
        sources.push(source);
    }

    Ok(GraphInput {
        node_count,
        edges,
        sources,
        max_depth,
    })
}

/// Compute nodes reachable from `source`.
/// BFS from a source node, returning (parent, node, depth) triples.
#[inline]
pub fn reachable_from(
    graph: &GraphInput,
    source: u32,
) -> Result<Vec<(u32, u32, u32)>, ValidationError> {
    if graph.node_count > MAX_GRAPH_NODES {
        return Err(ValidationError::GraphTooLarge {
            actual: graph.node_count,
            max: MAX_GRAPH_NODES,
        });
    }
    if source as usize >= graph.node_count {
        return Err(ValidationError::InvalidFrame);
    }
    let mut adjacency = vec![Vec::<u32>::new(); graph.node_count];
    for &(from, to) in &graph.edges {
        adjacency[from as usize].push(to);
    }

    let mut reached = Vec::new();
    let mut visited = vec![false; graph.node_count];
    let mut queue = VecDeque::new();
    visited[source as usize] = true;
    queue.push_back((source, 0u32));

    while let Some((node, depth)) = queue.pop_front() {
        if node != source {
            reached.push((source, node, depth));
        }
        if depth >= graph.max_depth {
            continue;
        }
        for &target in &adjacency[node as usize] {
            let target_idx = target as usize;
            if !visited[target_idx] {
                visited[target_idx] = true;
                queue.push_back((target, depth.saturating_add(1)));
            }
        }
    }
    Ok(reached)
}

/// Encode reached tuples as contiguous little-endian `(source, node, depth)` words.
/// Encode reachability results back into bytes.
#[inline]
pub fn encode_reached(reached: &[(u32, u32, u32)]) -> Vec<u8> {
    let mut bytes = Vec::with_capacity(reached.len() * 12);
    for &(source, node, depth) in reached {
        bytes.extend_from_slice(&source.to_le_bytes());
        bytes.extend_from_slice(&node.to_le_bytes());
        bytes.extend_from_slice(&depth.to_le_bytes());
    }
    bytes
}

fn words(input: &[u8]) -> Option<Vec<u32>> {
    if input.len() % 4 != 0 {
        return None;
    }
    Some(
        input
            .chunks_exact(4)
            .map(|word| u32::from_le_bytes([word[0], word[1], word[2], word[3]]))
            .collect(),
    )
}

fn wgsl() -> String {
    "fn vyre_op(index: u32, input_len: u32) -> u32 {
        if index >= input_len { return 0u; }
        return input.data[index];
    }"
    .to_string()
}

/// Build the BFS conformance spec.
/// Build the conformance specification for BFS.
#[inline]
pub fn vyre_op() -> OpSpec {
    let id = "graph.bfs";
    OpSpec::builder(id)
        .signature(OpSignature {
            inputs: vec![DataType::Bytes],
            output: DataType::Bytes,
        })
        .cpu_fn(cpu)
        .wgsl_fn(wgsl)
        .category(crate::Category::A {
            composition_of: vec![id],
        })
        .laws(vec![crate::spec::law::AlgebraicLaw::Bounded {
            lo: 0,
            hi: u32::MAX,
        }])
        .strictness(crate::spec::types::Strictness::Strict)
        .version(1)
        .alt_wgsl_fns(vec![("category_a_handwritten", wgsl)])
        .convention(Convention::default())
        .workgroup_size(Some(1))
        .boundary_values(vec![
            crate::spec::types::BoundaryValue {
                label: "empty",
                inputs: vec![0],
            },
            crate::spec::types::BoundaryValue {
                label: "single_element",
                inputs: vec![1],
            },
            crate::spec::types::BoundaryValue {
                label: "boundary",
                inputs: vec![255],
            },
            crate::spec::types::BoundaryValue {
                label: "max",
                inputs: vec![u32::MAX],
            },
        ])
        .equivalence_classes(vec![
            crate::spec::types::EquivalenceClass::specific("empty input", vec![0]),
            crate::spec::types::EquivalenceClass::specific("typical input", vec![42]),
            crate::spec::types::EquivalenceClass::specific("boundary input", vec![255]),
        ])
        .expect("Fix: checked-in conform spec must satisfy the typestate builder")
}

#[cfg(test)]
mod tests {

    use super::{cpu, encode_reached, MAGIC};

    #[test]
    fn computes_single_source_bfs_frame() {
        let mut input = MAGIC.to_vec();
        for word in [4u32, 3, 1, 8, 0, 1, 1, 2, 0, 3, 0] {
            input.extend_from_slice(&word.to_le_bytes());
        }
        assert_eq!(
            cpu(&input),
            encode_reached(&[(0, 1, 1), (0, 3, 1), (0, 2, 2)])
        );
    }
}

#[cfg(test)]
mod proptests {
    #[test]
    fn coverage_artifacts_are_registered() {
        assert!(!super::KAT.is_empty());
        assert!(!super::ADVERSARIAL.is_empty());
    }
}