tokitai-operator 0.1.0

Verified DL kernel compiler: formally-checked GEMM, p-adic, sheaf, contract-carrying ops. Paper-artifact grade.
Documentation
//! Backend surface: CPU, GPU scaffold, ROCm/HIP, memory, conformance.
//!
//! This module tree is the executable layer of the project. The
//! `ComputeHardware` trait is the abstract interface; the
//! concrete implementations are `CpuScalarBackend`,
//! `GpuScaffoldBackend`, and (under `--features rocm-hip`) the
//! `hip_*` modules.
//!
//! - `cpu` — `CpuScalarBackend`, the default reference path.
//! - `gpu` — `GpuScaffoldBackend`, the CPU-side stub used in the
//!   default build.
//! - `hip_*` (gated on `rocm-hip`) — real HIP kernels and
//!   benchmark wrappers.
//! - `conformance` — cross-backend conformance runner.
//! - `memory` — `MemorySpace` and `Layout` types.
//! - `hardware` — `ComputeHardware` trait and capability types.
//!
//! Public types: `CpuScalarBackend`, `GpuScaffoldBackend`,
//! `BackendCapabilities`, `TensorStore<T>`, `MemorySpace`, `Layout`.
//!
pub mod conformance;
pub mod cpu;
pub mod f16_convert;
pub mod gpu;
pub mod hardware;
#[cfg(feature = "rocm-hip")]
pub mod hip_dense;
#[cfg(feature = "rocm-hip")]
pub mod hip_embedding;
#[cfg(feature = "rocm-hip")]
pub mod hip_gelu;
#[cfg(feature = "rocm-hip")]
pub mod hip_gelu_bw;
#[cfg(feature = "rocm-hip")]
pub mod hip_gemm_bw;
#[cfg(feature = "rocm-hip")]
pub mod hip_gemm_f16;
#[cfg(feature = "rocm-hip")]
pub mod hip_layernorm;
#[cfg(feature = "rocm-hip")]
pub mod hip_padic;
#[cfg(feature = "rocm-hip")]
pub mod hip_padic_benchmarks;
#[cfg(feature = "rocm-hip")]
pub mod hip_padic_matmul;
#[cfg(feature = "rocm-hip")]
pub mod hip_sheaf;
#[cfg(feature = "rocm-hip")]
pub mod hip_sheaf_overlap_check;
#[cfg(feature = "rocm-hip")]
pub mod hip_softmax;
#[cfg(feature = "rocm-hip")]
pub mod kernel_server;
pub mod memory;
pub mod rocm;

use std::collections::BTreeMap;

use crate::ir::SemanticGraph;
use crate::object::{Representation, Tensor};
use crate::planner::ExecutionPlan;
use crate::{Error, Result};

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BackendCapabilities {
    pub name: String,
    pub exact: bool,
    pub deterministic: bool,
    pub supported_representations: Vec<String>,
    pub supported_domains: Vec<String>,
    pub semantic_degradations: Vec<String>,
}

impl BackendCapabilities {
    pub fn cpu_scalar() -> Self {
        Self {
            name: "cpu_scalar".to_string(),
            exact: true,
            deterministic: true,
            supported_representations: vec![Representation::dense_cpu().id().0],
            supported_domains: vec![
                "integer".to_string(),
                "f32".to_string(),
                "padic:fixed_precision".to_string(),
                "sheaf:finite_site".to_string(),
            ],
            semantic_degradations: Vec::new(),
        }
    }
}

#[derive(Debug, Clone, PartialEq)]
pub struct LoweredPlan {
    pub plan: ExecutionPlan,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Executable {
    pub backend: String,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ObjectRef {
    pub id: usize,
}

#[derive(Debug, Clone, Default, PartialEq)]
pub struct TensorStore<T> {
    tensors: BTreeMap<usize, Tensor<T>>,
}

impl<T> TensorStore<T> {
    pub fn new() -> Self {
        Self {
            tensors: BTreeMap::new(),
        }
    }

    pub fn insert(&mut self, id: usize, tensor: Tensor<T>) {
        self.tensors.insert(id, tensor);
    }

    pub fn get(&self, id: usize) -> Result<&Tensor<T>> {
        self.tensors
            .get(&id)
            .ok_or_else(|| Error::backend(format!("missing tensor value {id}")))
    }

    pub fn get_optional(&self, id: usize) -> Option<&Tensor<T>> {
        self.tensors.get(&id)
    }

    pub fn contains(&self, id: usize) -> bool {
        self.tensors.contains_key(&id)
    }
}

pub trait Backend {
    fn name(&self) -> &'static str;
    fn capabilities(&self) -> BackendCapabilities;
    fn compile(&self, plan: &ExecutionPlan) -> Result<Executable>;
    fn execute(&self, executable: &Executable, args: &[ObjectRef]) -> Result<()>;
}

pub trait GraphExecutor<T> {
    fn execute_graph(
        &self,
        graph: &SemanticGraph,
        plan: &ExecutionPlan,
        store: &mut TensorStore<T>,
    ) -> Result<()>;
}

pub trait SpecializedPlanExecutor<T, R> {
    fn execute_specialized_plan(
        &self,
        plan: &ExecutionPlan,
        store: &mut TensorStore<T>,
    ) -> Result<R>;
}