trueno 0.17.1

High-performance SIMD compute library with GPU support for matrix operations
Documentation
//! ComputeBrick: Self-verifying, token-centric compute unit.
//!
//! Bundles an operation with assertions, budget, and verification.
//! Also includes BrickLayer for composing multiple bricks.

use std::fmt;
use std::marker::PhantomData;
use std::time::Instant;

use super::budget::TokenBudget;
use super::types::{
    AssertionResult, Backend, BrickError, BrickVerification, ComputeAssertion, ComputeOp,
};
use super::TokenResult;

/// Self-verifying, token-centric compute unit.
/// Bundles: operation + assertions + budget + verification
pub struct ComputeBrick<Op: ComputeOp> {
    /// The compute operation
    op: Op,
    /// Falsifiable assertions
    assertions: Vec<ComputeAssertion>,
    /// Token-centric performance budget
    budget: TokenBudget,
    /// Execution backend
    backend: Backend,
    /// Enforce budget (fail if exceeded)
    enforce_budget: bool,
    /// Phantom for variance
    _phantom: PhantomData<Op>,
}

impl<Op: ComputeOp> ComputeBrick<Op> {
    /// Create a new compute brick with the given operation.
    pub fn new(op: Op) -> Self {
        Self {
            op,
            assertions: Vec::new(),
            budget: TokenBudget::default(),
            backend: Backend::Auto,
            enforce_budget: false,
            _phantom: PhantomData,
        }
    }

    /// Add equivalence assertion (output must match baseline backend).
    #[must_use]
    pub fn assert_equiv(mut self, baseline: Backend) -> Self {
        self.assertions.push(ComputeAssertion::equiv(baseline));
        self
    }

    /// Add equivalence assertion with custom tolerance.
    #[must_use]
    pub fn assert_equiv_with_tolerance(mut self, baseline: Backend, tolerance: f64) -> Self {
        self.assertions.push(ComputeAssertion::equiv_with_tolerance(baseline, tolerance));
        self
    }

    /// Add bounds assertion (output values within range).
    #[must_use]
    pub fn assert_bounds(mut self, min: f64, max: f64) -> Self {
        self.assertions.push(ComputeAssertion::bounds(min, max));
        self
    }

    /// Add finite assertion (no NaN/Inf in output).
    #[must_use]
    pub fn assert_finite(mut self) -> Self {
        self.assertions.push(ComputeAssertion::finite());
        self
    }

    /// Set token throughput budget (tokens/second).
    #[must_use]
    pub fn budget_tok_per_sec(mut self, tps: f64) -> Self {
        self.budget = TokenBudget::from_throughput(tps);
        self
    }

    /// Set token latency budget (microseconds/token).
    #[must_use]
    pub fn budget_us_per_tok(mut self, us: f64) -> Self {
        self.budget = TokenBudget::from_latency(us);
        self
    }

    /// Set full budget configuration.
    #[must_use]
    pub fn budget(mut self, budget: TokenBudget) -> Self {
        self.budget = budget;
        self
    }

    /// Set execution backend.
    #[must_use]
    pub fn backend(mut self, backend: Backend) -> Self {
        self.backend = backend;
        self
    }

    /// Enforce budget (fail if exceeded). Default is false (just report).
    #[must_use]
    pub fn enforce_budget(mut self, enforce: bool) -> Self {
        self.enforce_budget = enforce;
        self
    }

    /// Get the brick name (from operation).
    pub fn name(&self) -> &'static str {
        self.op.name()
    }

    /// Get current budget.
    pub fn get_budget(&self) -> TokenBudget {
        self.budget
    }

    /// Get current backend.
    pub fn get_backend(&self) -> Backend {
        self.backend
    }

    /// Get assertions.
    pub fn get_assertions(&self) -> &[ComputeAssertion] {
        &self.assertions
    }

    /// Run the compute brick with full verification (Jidoka gate).
    pub fn run(&self, input: Op::Input) -> Result<TokenResult<Op::Output>, BrickError> {
        let tokens = self.op.tokens(&input);

        // Execute with timing
        let start = Instant::now();
        let output = self.op.execute(input, self.backend)?;
        let elapsed_us = start.elapsed().as_secs_f64() * 1_000_000.0;

        // Calculate metrics
        let us_per_token = if tokens > 0 { elapsed_us / tokens as f64 } else { elapsed_us };
        let tokens_per_sec =
            if elapsed_us > 0.0 { tokens as f64 * 1_000_000.0 / elapsed_us } else { f64::INFINITY };
        let budget_met = self.budget.is_met(us_per_token);
        let budget_utilization = self.budget.utilization(us_per_token);

        // Check budget enforcement
        if self.enforce_budget && !budget_met {
            return Err(BrickError::BudgetExceeded {
                limit_us: self.budget.us_per_token,
                actual_us: us_per_token,
                utilization: budget_utilization * 100.0,
            });
        }

        Ok(TokenResult {
            output,
            tokens_processed: tokens,
            us_per_token,
            tokens_per_sec,
            budget_met,
            budget_utilization,
        })
    }

    /// Verify assertions without full execution.
    /// Returns verification status.
    pub fn verify(&self) -> BrickVerification {
        let start = Instant::now();

        // Check if we have assertions (Popperian requirement)
        if self.assertions.is_empty() {
            return BrickVerification {
                passed: false,
                assertion_results: vec![AssertionResult {
                    assertion: ComputeAssertion::Custom {
                        name: "popperian_falsifiability".to_string(),
                    },
                    passed: false,
                    error: Some(
                        "No assertions defined - violates Popperian falsifiability".to_string(),
                    ),
                }],
                verification_us: start.elapsed().as_secs_f64() * 1_000_000.0,
            };
        }

        // For now, just validate assertion structure
        // Full verification requires input data
        let results: Vec<AssertionResult> = self
            .assertions
            .iter()
            .map(|a| AssertionResult { assertion: a.clone(), passed: true, error: None })
            .collect();

        let passed = results.iter().all(|r| r.passed);

        BrickVerification {
            passed,
            assertion_results: results,
            verification_us: start.elapsed().as_secs_f64() * 1_000_000.0,
        }
    }
}

impl<Op: ComputeOp + Clone> Clone for ComputeBrick<Op> {
    fn clone(&self) -> Self {
        Self {
            op: self.op.clone(),
            assertions: self.assertions.clone(),
            budget: self.budget,
            backend: self.backend,
            enforce_budget: self.enforce_budget,
            _phantom: PhantomData,
        }
    }
}

impl<Op: ComputeOp> fmt::Debug for ComputeBrick<Op> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("ComputeBrick")
            .field("name", &self.op.name())
            .field("backend", &self.backend)
            .field("budget", &self.budget)
            .field("assertions", &self.assertions.len())
            .field("enforce_budget", &self.enforce_budget)
            .finish()
    }
}

// ============================================================================
// LLM Transformer Fused Operations (PMAT-PERF-009)
// BrickLayer: Compose multiple bricks
// ============================================================================

/// A layer of compute bricks that execute sequentially.
/// Throughput ceiling = min(component throughputs).
#[derive(Debug, Default)]
pub struct BrickLayer {
    /// Named bricks in this layer
    bricks: Vec<(String, f64)>, // (name, budget_tok_per_sec)
}

impl BrickLayer {
    /// Create a new empty layer.
    pub fn new() -> Self {
        Self::default()
    }

    /// Add a brick to the layer.
    #[must_use]
    pub fn with_brick<Op: ComputeOp>(mut self, brick: &ComputeBrick<Op>) -> Self {
        self.bricks.push((brick.name().to_string(), brick.budget.tokens_per_sec));
        self
    }

    /// Add a named entry with throughput budget.
    #[must_use]
    pub fn with_named(mut self, name: &str, budget_tok_per_sec: f64) -> Self {
        self.bricks.push((name.to_string(), budget_tok_per_sec));
        self
    }

    /// Get the throughput ceiling (bottleneck).
    /// Layer throughput = min(component throughputs).
    pub fn throughput_ceiling(&self) -> f64 {
        self.bricks.iter().map(|(_, tps)| *tps).fold(f64::INFINITY, f64::min)
    }

    /// Get the bottleneck brick name.
    pub fn bottleneck(&self) -> Option<&str> {
        self.bricks
            .iter()
            .min_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
            .map(|(name, _)| name.as_str())
    }

    /// Get all bricks with their budgets.
    pub fn bricks(&self) -> &[(String, f64)] {
        &self.bricks
    }
}