use super::CompiledScanner;
const FUSED_CACHE_VERSION: u32 = 1;
impl CompiledScanner {
pub fn fused_program(&self) -> Option<&vyre::Program> {
self.fused_program
.get_or_init(|| {
let ac_program = self.ac_gpu_program()?;
let mut programs: Vec<&vyre::Program> = vec![ac_program];
if let Some(pipeline) = self.rule_pipeline() {
programs.push(&pipeline.program);
}
if programs.len() < 2 {
tracing::debug!(
target: "keyhog::gpu",
programs = programs.len(),
"program fusion skipped - fewer than 2 eligible programs"
);
return None;
}
let started = std::time::Instant::now();
match vyre_libs::scan::fuse_programs(
&programs.iter().map(|p| (*p).clone()).collect::<Vec<_>>(),
) {
Ok(fused) => {
let elapsed_ms = started.elapsed().as_millis();
tracing::info!(
target: "keyhog::gpu",
input_programs = programs.len(),
fused_buffers = fused.buffers().len(),
fused_workgroup = ?fused.workgroup_size(),
elapsed_ms,
"program fusion succeeded - single GPU dispatch active"
);
self.cache_fused_program(&fused, &programs);
Some(fused)
}
Err(error) => {
tracing::debug!(
target: "keyhog::gpu",
input_programs = programs.len(),
error = %error,
"program fusion failed - falling back to sequential dispatch. \
Common causes: incompatible buffer layouts, over-dispatch geometry, \
or self-aliasing constraints."
);
None
}
}
})
.as_ref()
}
fn cache_fused_program(&self, fused: &vyre::Program, _programs: &[&vyre::Program]) {
let Some(cache_dir) = super::gpu_cache::gpu_matcher_cache_dir() else {
return;
};
let cache_key = format!("fused-{}", fused_cache_key(fused));
let Some(path) = vyre_libs::scan::engine_cache_path(&cache_dir, &cache_key) else {
return;
};
let bytes = fused.to_bytes();
let tmp = path.with_extension(format!("tmp.{}", std::process::id()));
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
if std::fs::write(&tmp, &bytes).is_ok() {
if let Err(error) = std::fs::rename(&tmp, &path) {
tracing::debug!(
target: "keyhog::gpu",
error = %error,
path = %path.display(),
"fused program cache rename failed"
);
let _ = std::fs::remove_file(&tmp);
}
}
}
}
fn fused_cache_key(program: &vyre::Program) -> String {
use sha2::{Digest, Sha256};
let mut h = Sha256::new();
h.update(FUSED_CACHE_VERSION.to_le_bytes());
let ir_bytes = program.to_bytes();
h.update((ir_bytes.len() as u64).to_le_bytes());
h.update(&ir_bytes);
let digest = h.finalize();
let mut hex = String::with_capacity(64);
for byte in digest {
use std::fmt::Write as _;
let _ = write!(hex, "{:02x}", byte);
}
hex
}
pub const FUSION_CACHE_VERSION: u32 = 1;
pub fn try_fuse(programs: &[&vyre::Program]) -> std::result::Result<vyre::Program, String> {
if programs.is_empty() {
return Err("Cannot fuse empty program list".to_string());
}
let owned_programs: Vec<vyre::Program> = programs.iter().map(|p| (*p).clone()).collect();
vyre_libs::scan::fuse_programs(&owned_programs).map_err(|e| e.to_string())
}
pub fn fuse_or_fallback(programs: &[&vyre::Program]) -> Option<vyre::Program> {
try_fuse(programs).ok()
}
pub fn fusion_cache_key(programs: &[&vyre::Program]) -> String {
use sha2::{Digest, Sha256};
let mut h = Sha256::new();
h.update(FUSION_CACHE_VERSION.to_le_bytes());
for p in programs {
let ir_bytes = p.to_bytes();
h.update((ir_bytes.len() as u64).to_le_bytes());
h.update(&ir_bytes);
}
let digest = h.finalize();
let mut hex = String::with_capacity(64);
for byte in digest {
use std::fmt::Write as _;
let _ = write!(hex, "{:02x}", byte);
}
hex
}