use crate::backend::hardware::{
ComputeHardware, DeviceCapabilities, DeviceKind, HardwareTarget, MemorySpace,
};
use crate::backend::memory::{DeviceBuffer, TransferFallbackReason, TransferPlan, TransferStatus};
use crate::backend::{Backend, BackendCapabilities, Executable, ObjectRef};
use crate::planner::{ExecutionPlan, PlanStepKind};
use crate::{Error, Result};
#[cfg(feature = "accelerated-pilot")]
use crate::backend::TensorStore;
#[cfg(feature = "accelerated-pilot")]
use crate::backend::cpu::CpuScalarBackend;
#[cfg(feature = "accelerated-pilot")]
use crate::domain::DomainId;
#[cfg(feature = "accelerated-pilot")]
use crate::ir::SemanticGraph;
#[cfg(feature = "accelerated-pilot")]
use crate::object::Tensor;
#[cfg(feature = "accelerated-pilot")]
use crate::op::{LoweringCapability, LoweringEvidenceKind, LoweringRule, OperatorRegistry};
#[cfg(feature = "accelerated-pilot")]
use crate::planner::HeuristicPlanner;
#[derive(Debug, Clone, Copy, Default)]
pub struct GpuScaffoldBackend;
#[cfg(feature = "accelerated-pilot")]
#[derive(Debug, Clone, Copy, Default)]
pub struct GpuDenseI64PilotBackend;
#[cfg(feature = "accelerated-pilot")]
pub const GPU_DENSE_I64_PILOT_LOWERING_ID: &str = "gpu_pilot.add.dense_i64";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum GpuSynchronizationModel {
HostSynchronous,
StreamSynchronized { stream: String },
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GpuTransferLifecycle {
pub allocates_device_memory: bool,
pub host_to_device_copy: bool,
pub device_to_host_copy: bool,
pub synchronization: GpuSynchronizationModel,
pub cpu_oracle_verification: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GpuKernelRegistryEntry {
pub op_name: String,
pub kernel_symbol: String,
pub scalar_type: String,
pub supported_domain: String,
pub supported_representation: String,
pub source_fingerprint: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GpuExecutionContract {
pub backend: String,
pub target: HardwareTarget,
pub scope: String,
pub real_device_execution: bool,
pub lifecycle: GpuTransferLifecycle,
pub kernels: Vec<GpuKernelRegistryEntry>,
pub evidence: Vec<String>,
pub non_claims: Vec<String>,
}
impl GpuExecutionContract {
pub fn scaffold_fallback() -> Self {
Self {
backend: "gpu_scaffold".to_string(),
target: GpuScaffoldBackend::target(),
scope: "fallback-only GPU planning scaffold".to_string(),
real_device_execution: false,
lifecycle: GpuTransferLifecycle {
allocates_device_memory: false,
host_to_device_copy: false,
device_to_host_copy: false,
synchronization: GpuSynchronizationModel::HostSynchronous,
cpu_oracle_verification: false,
},
kernels: Vec::new(),
evidence: vec![
"gpu_scaffold intentionally has no executable kernels".to_string(),
"all executable work must fall back to cpu_scalar".to_string(),
],
non_claims: vec![
"not real GPU execution".to_string(),
"not generic GPU support".to_string(),
],
}
}
#[cfg(feature = "accelerated-pilot")]
pub fn dense_i64_host_vector_pilot() -> Self {
Self {
backend: "gpu_dense_i64_pilot".to_string(),
target: GpuDenseI64PilotBackend::target(),
scope: "feature-gated dense i64 add host-vector pilot".to_string(),
real_device_execution: false,
lifecycle: GpuTransferLifecycle {
allocates_device_memory: false,
host_to_device_copy: false,
device_to_host_copy: false,
synchronization: GpuSynchronizationModel::HostSynchronous,
cpu_oracle_verification: true,
},
kernels: vec![GpuKernelRegistryEntry {
op_name: "add".to_string(),
kernel_symbol: "host_vector_dense_i64_add".to_string(),
scalar_type: "i64".to_string(),
supported_domain: "integer".to_string(),
supported_representation: crate::object::Representation::dense_cpu().id().0,
source_fingerprint: "host-vector-rust".to_string(),
}],
evidence: vec![
"selected through public prefer-gpu planning only under accelerated-pilot"
.to_string(),
"outputs must match CpuScalarBackend exactly".to_string(),
],
non_claims: vec![
"not real device allocation".to_string(),
"not production GPU acceleration".to_string(),
"not generic GPU support".to_string(),
],
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GpuUnsupportedReport {
pub backend: String,
pub reason: GpuUnsupportedReason,
pub transfer_reason: Option<TransferFallbackReason>,
pub fallback_backend: String,
pub evidence: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum GpuUnsupportedReason {
NonstandardDomain { domain: String },
SheafLocality { op_name: String },
PadicPrecision { domain: String },
DeviceTransfer { message: String },
NoKernel { op_name: String },
}
impl GpuUnsupportedReason {
pub fn message(&self) -> String {
match self {
Self::NonstandardDomain { domain } => {
format!("gpu_scaffold does not support mathematical domain {domain}")
}
Self::SheafLocality { op_name } => {
format!("gpu_scaffold does not support finite-site sheaf locality for {op_name}")
}
Self::PadicPrecision { domain } => {
format!(
"gpu_scaffold does not support fixed-precision p-adic execution for {domain}"
)
}
Self::DeviceTransfer { message } => message.clone(),
Self::NoKernel { op_name } => {
format!("gpu_scaffold has no executable kernel for {op_name}")
}
}
}
}
impl GpuUnsupportedReport {
fn new(reason: GpuUnsupportedReason, transfer_reason: Option<TransferFallbackReason>) -> Self {
let message = reason.message();
Self {
backend: "gpu_scaffold".to_string(),
reason,
transfer_reason,
fallback_backend: "cpu_scalar".to_string(),
evidence: vec![
"P183 GPU support is scaffold/fallback only; no optimized kernels are claimed"
.to_string(),
message,
],
}
}
pub fn new_for_real_backend(backend: &str, reason: GpuUnsupportedReason) -> Self {
let message = reason.message();
Self {
backend: backend.to_string(),
reason,
transfer_reason: None,
fallback_backend: "cpu_scalar".to_string(),
evidence: vec![
format!("{backend} rejected unsupported public GPU plan"),
message,
],
}
}
}
impl GpuScaffoldBackend {
pub fn target() -> HardwareTarget {
HardwareTarget {
id: "gpu_scaffold".to_string(),
kind: DeviceKind::Gpu,
memory_space: MemorySpace::Device,
}
}
pub fn capabilities() -> BackendCapabilities {
BackendCapabilities {
name: "gpu_scaffold".to_string(),
exact: false,
deterministic: false,
supported_representations: vec![crate::object::Representation::dense_cpu().id().0],
supported_domains: vec!["integer".to_string()],
semantic_degradations: vec![
"scaffold_only:no_kernel_execution".to_string(),
"unsupported:padic:fixed_precision".to_string(),
"unsupported:sheaf:finite_site".to_string(),
],
}
}
pub fn unsupported_plan_report(&self, plan: &ExecutionPlan) -> Option<GpuUnsupportedReport> {
for step in &plan.steps {
if step.domain.starts_with("Q_") || step.domain.contains("padic") {
return Some(GpuUnsupportedReport::new(
GpuUnsupportedReason::PadicPrecision {
domain: step.domain.clone(),
},
None,
));
}
if matches!(step.kind, PlanStepKind::CoverGlueCheck { .. })
|| step.domain.starts_with("cover:")
{
return Some(GpuUnsupportedReport::new(
GpuUnsupportedReason::SheafLocality {
op_name: step.op_name.clone(),
},
None,
));
}
if step.domain != "integer" && step.domain != "unknown" {
return Some(GpuUnsupportedReport::new(
GpuUnsupportedReason::NonstandardDomain {
domain: step.domain.clone(),
},
None,
));
}
return Some(GpuUnsupportedReport::new(
GpuUnsupportedReason::NoKernel {
op_name: step.op_name.clone(),
},
None,
));
}
None
}
pub fn unsupported_transfer_report(
&self,
source: DeviceBuffer,
destination: DeviceBuffer,
) -> Option<GpuUnsupportedReport> {
let transfer = TransferPlan::plan(source, destination);
match transfer.status {
TransferStatus::Supported | TransferStatus::NoOp => None,
TransferStatus::Unsupported(reason) => Some(GpuUnsupportedReport::new(
GpuUnsupportedReason::DeviceTransfer {
message: format!("gpu_scaffold rejected transfer: {reason:?}"),
},
Some(reason),
)),
}
}
}
impl Backend for GpuScaffoldBackend {
fn name(&self) -> &'static str {
"gpu_scaffold"
}
fn capabilities(&self) -> BackendCapabilities {
Self::capabilities()
}
fn compile(&self, plan: &ExecutionPlan) -> Result<Executable> {
if let Some(report) = self.unsupported_plan_report(plan) {
return Err(Error::backend(report.reason.message()));
}
Ok(Executable {
backend: self.name().to_string(),
})
}
fn execute(&self, _executable: &Executable, _args: &[ObjectRef]) -> Result<()> {
Err(Error::backend(
"gpu_scaffold has no runtime execution kernels; use CPU fallback",
))
}
}
impl ComputeHardware for GpuScaffoldBackend {
fn target(&self) -> HardwareTarget {
Self::target()
}
fn device_capabilities(&self) -> DeviceCapabilities {
DeviceCapabilities::from_backend(Self::target(), Self::capabilities())
}
}
#[cfg(feature = "accelerated-pilot")]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GpuDenseI64PilotReport {
pub backend: String,
pub op_name: String,
pub checked_outputs: Vec<usize>,
pub cpu_oracle_matches: bool,
pub preliminary_runtime_ns: Option<u64>,
pub evidence: Vec<String>,
}
#[cfg(feature = "accelerated-pilot")]
impl GpuDenseI64PilotBackend {
pub fn target() -> HardwareTarget {
HardwareTarget {
id: "gpu_dense_i64_pilot".to_string(),
kind: DeviceKind::Gpu,
memory_space: MemorySpace::Device,
}
}
pub fn capabilities() -> BackendCapabilities {
BackendCapabilities {
name: "gpu_dense_i64_pilot".to_string(),
exact: true,
deterministic: true,
supported_representations: vec![crate::object::Representation::dense_cpu().id().0],
supported_domains: vec!["integer".to_string()],
semantic_degradations: vec![
"pilot:feature_gated_dense_i64_add_only".to_string(),
"pilot:host_vector_kernel_no_device_allocator".to_string(),
"unsupported:padic:fixed_precision".to_string(),
"unsupported:sheaf:finite_site".to_string(),
],
}
}
pub fn lowering_rule() -> LoweringRule {
LoweringRule::new(
GPU_DENSE_I64_PILOT_LOWERING_ID,
"add",
"gpu_dense_i64_pilot",
vec![crate::object::Representation::dense_cpu().id().0],
)
.with_supported_domain("integer")
.with_capability(LoweringCapability::dense_integer())
.with_required_evidence(
LoweringEvidenceKind::ExactnessPreserved,
"feature-gated dense i64 pilot preserves integer addition after CPU oracle comparison",
)
.with_obligation(
"inputs must be dense i64 tensors with identical shape",
"the pilot executes host-vector elementwise addition and checks CpuScalarBackend output",
)
.with_obligation(
"performance claims remain disabled for the host-vector pilot",
"the accelerated-pilot path has no device allocator, stream, or external GPU dependency",
)
}
pub fn execute_i64_add_with_cpu_oracle(
&self,
graph: &SemanticGraph,
plan: &ExecutionPlan,
store: &mut TensorStore<i64>,
) -> Result<GpuDenseI64PilotReport> {
self.ensure_supported_i64_add_plan(plan)?;
let mut oracle_store = store.clone();
let cpu_plan = HeuristicPlanner::new(BackendCapabilities::cpu_scalar()).plan(graph)?;
CpuScalarBackend.execute_i64(graph, &cpu_plan, &mut oracle_store)?;
let mut checked_outputs = Vec::new();
for step in &plan.steps {
let node = graph
.nodes()
.get(step.node_id)
.ok_or_else(|| Error::backend(format!("unknown node {}", step.node_id)))?;
let lhs = store.get(node.inputs[0])?.clone();
let rhs = store.get(node.inputs[1])?.clone();
let output = dense_i64_add_tensor(&lhs, &rhs)?;
let output_id = node.output_ids[0];
store.insert(output_id, output);
checked_outputs.push(output_id);
}
for output_id in &checked_outputs {
let candidate = store.get(*output_id)?;
let oracle = oracle_store.get(*output_id)?;
if candidate != oracle {
return Ok(GpuDenseI64PilotReport {
backend: self.name().to_string(),
op_name: "add".to_string(),
checked_outputs,
cpu_oracle_matches: false,
preliminary_runtime_ns: None,
evidence: vec![
"P187 dense i64 pilot executed but failed CPU oracle comparison"
.to_string(),
],
});
}
}
Ok(GpuDenseI64PilotReport {
backend: self.name().to_string(),
op_name: "add".to_string(),
checked_outputs,
cpu_oracle_matches: true,
preliminary_runtime_ns: None,
evidence: vec![
"P187 accelerated pilot is feature-gated and scoped to dense i64 add".to_string(),
"candidate output matched CpuScalarBackend oracle exactly".to_string(),
"performance is preliminary: no device allocator, stream, or external GPU dependency is used"
.to_string(),
],
})
}
fn ensure_supported_i64_add_plan(&self, plan: &ExecutionPlan) -> Result<()> {
if plan.backend != self.name() {
return Err(Error::backend(format!(
"plan targets backend {}, but executor is {}",
plan.backend,
self.name()
)));
}
if plan.steps.is_empty() {
return Err(Error::backend(
"gpu_dense_i64_pilot requires at least one dense i64 add step",
));
}
for step in &plan.steps {
if step.domain.starts_with("Q_") || step.domain.contains("padic") {
return Err(Error::backend(format!(
"gpu_dense_i64_pilot does not support fixed-precision p-adic execution for {}",
step.domain
)));
}
if matches!(step.kind, PlanStepKind::CoverGlueCheck { .. })
|| step.domain.starts_with("cover:")
{
return Err(Error::backend(format!(
"gpu_dense_i64_pilot does not support finite-site sheaf locality for {}",
step.op_name
)));
}
if !matches!(step.kind, PlanStepKind::Single)
|| step.op_name != "add"
|| step.domain != "integer"
{
return Err(Error::backend(format!(
"gpu_dense_i64_pilot only supports single dense integer add steps, got op={} domain={}",
step.op_name, step.domain
)));
}
}
Ok(())
}
}
#[cfg(feature = "accelerated-pilot")]
pub fn register_gpu_dense_i64_pilot_lowering(registry: &mut OperatorRegistry) -> Result<()> {
registry.register_lowering(GpuDenseI64PilotBackend::lowering_rule())
}
#[cfg(feature = "accelerated-pilot")]
impl Backend for GpuDenseI64PilotBackend {
fn name(&self) -> &'static str {
"gpu_dense_i64_pilot"
}
fn capabilities(&self) -> BackendCapabilities {
Self::capabilities()
}
fn compile(&self, plan: &ExecutionPlan) -> Result<Executable> {
self.ensure_supported_i64_add_plan(plan)?;
Ok(Executable {
backend: self.name().to_string(),
})
}
fn execute(&self, _executable: &Executable, _args: &[ObjectRef]) -> Result<()> {
Err(Error::backend(
"gpu_dense_i64_pilot requires execute_i64_add_with_cpu_oracle for semantic guardrails",
))
}
}
#[cfg(feature = "accelerated-pilot")]
impl ComputeHardware for GpuDenseI64PilotBackend {
fn target(&self) -> HardwareTarget {
Self::target()
}
fn device_capabilities(&self) -> DeviceCapabilities {
DeviceCapabilities::from_backend(Self::target(), Self::capabilities())
}
}
#[cfg(feature = "accelerated-pilot")]
fn dense_i64_add_tensor(lhs: &Tensor<i64>, rhs: &Tensor<i64>) -> Result<Tensor<i64>> {
if lhs.meta.domain != DomainId::new("integer") || rhs.meta.domain != DomainId::new("integer") {
return Err(Error::backend(
"gpu_dense_i64_pilot only supports integer-domain tensors",
));
}
if lhs.meta.shape != rhs.meta.shape {
return Err(Error::backend(format!(
"gpu_dense_i64_pilot add shape mismatch: lhs={:?}, rhs={:?}",
lhs.meta.shape, rhs.meta.shape
)));
}
if lhs.data.len() != rhs.data.len() {
return Err(Error::backend(format!(
"gpu_dense_i64_pilot add length mismatch: lhs={}, rhs={}",
lhs.data.len(),
rhs.data.len()
)));
}
Ok(Tensor {
meta: lhs.meta.clone(),
data: lhs
.data
.iter()
.zip(rhs.data.iter())
.map(|(lhs, rhs)| lhs + rhs)
.collect(),
})
}