use super::*;
use smallvec::SmallVec;
#[test]
fn test_output_indices_from_program_metadata_basic() {
let outputs = output_indices_from_program_metadata(&[0, 1, 2], &[2], 3).expect("metadata mapping should succeed");
assert_eq!(outputs, vec![2]);
}
#[test]
fn test_output_indices_from_program_metadata_sparse_slots() {
let outputs =
output_indices_from_program_metadata(&[2, 4, 7], &[4, 7], 3).expect("metadata mapping should succeed");
assert_eq!(outputs, vec![1, 2]);
}
#[test]
fn test_output_indices_from_program_metadata_rejects_empty_outs() {
let err = output_indices_from_program_metadata(&[0, 1], &[], 2).expect_err("empty outs should fail");
assert!(format!("{err}").contains("ProgramSpec.outs is empty"));
}
#[test]
fn test_output_indices_from_program_metadata_rejects_unknown_slot() {
let err = output_indices_from_program_metadata(&[0, 2], &[1], 2).expect_err("unknown outs slot should fail");
assert!(format!("{err}").contains("not found in ProgramSpec.globals"));
}
#[test]
fn test_output_indices_from_program_metadata_rejects_out_of_range_position() {
let err = output_indices_from_program_metadata(&[0, 1], &[1], 1).expect_err("mapped output index out of range");
assert!(format!("{err}").contains("out of range"));
}
#[test]
fn test_resolve_compiled_kernel_buffer_indices_reorders_by_program_globals() {
let p0 = UOp::param(0, 4, svod_dtype::DType::Float32, None);
let p1 = UOp::param(1, 4, svod_dtype::DType::Float32, None);
let body = UOp::sink(vec![p0.clone(), p1.clone()]);
let call = body.call(SmallVec::from_vec(vec![p1.clone(), p0.clone()]), svod_ir::CallInfo::default());
let item = crate::schedule::ScheduleItem {
kernel: call,
ast: body,
buffers: vec![],
buffer_uop_ids: vec![p1.id, p0.id],
fixedvars: std::collections::HashMap::new(),
dependencies: vec![],
instance_dependencies: vec![],
alias_registered_ids: vec![],
loop_var_names: std::collections::HashSet::new(),
};
let uop_id_to_idx = std::collections::HashMap::from([(p1.id, 11), (p0.id, 10)]);
let ordered =
resolve_compiled_kernel_buffer_indices(&item, &uop_id_to_idx, &[0, 1]).expect("compiled buffer ABI ordering");
assert_eq!(ordered, vec![11, 10]);
}
#[test]
fn test_resolve_compiled_kernel_buffer_indices_treats_globals_as_buffer_positions() {
let p0 = UOp::param(0, 4, svod_dtype::DType::Float32, None);
let p1 = UOp::param(1, 4, svod_dtype::DType::Float32, None);
let body = UOp::sink(vec![p0.clone(), p1.clone()]);
let call = body.call(SmallVec::from_vec(vec![p1.clone(), p0.clone()]), svod_ir::CallInfo::default());
let item = crate::schedule::ScheduleItem {
kernel: call,
ast: body,
buffers: vec![],
buffer_uop_ids: vec![p1.id, p0.id],
fixedvars: std::collections::HashMap::new(),
dependencies: vec![],
instance_dependencies: vec![],
alias_registered_ids: vec![],
loop_var_names: std::collections::HashSet::new(),
};
let uop_id_to_idx = std::collections::HashMap::from([(p1.id, 11), (p0.id, 10)]);
let ordered =
resolve_compiled_kernel_buffer_indices(&item, &uop_id_to_idx, &[1, 0]).expect("compiled buffer ABI ordering");
assert_eq!(ordered, vec![10, 11]);
}
#[test]
fn test_resolve_compiled_kernel_buffer_indices_rejects_out_of_range_global_position() {
let p0 = UOp::param(0, 4, svod_dtype::DType::Float32, None);
let body = UOp::sink(vec![p0.clone()]);
let call = body.call(SmallVec::from_vec(vec![p0.clone()]), svod_ir::CallInfo::default());
let item = crate::schedule::ScheduleItem {
kernel: call,
ast: body,
buffers: vec![],
buffer_uop_ids: vec![p0.id],
fixedvars: std::collections::HashMap::new(),
dependencies: vec![],
instance_dependencies: vec![],
alias_registered_ids: vec![],
loop_var_names: std::collections::HashSet::new(),
};
let uop_id_to_idx = std::collections::HashMap::from([(p0.id, 10)]);
let err = resolve_compiled_kernel_buffer_indices(&item, &uop_id_to_idx, &[1])
.expect_err("out-of-range global position should fail");
assert!(format!("{err}").contains("out of range"), "unexpected error: {err}");
}
#[test]
fn test_restore_post_schedule_pre_schedule_rewrites_runtime_buf_uops() {
crate::test::helpers::test_setup();
let c = &Tensor::from_slice([1.0f32, 2.0, 3.0]) + &Tensor::from_slice([4.0f32, 5.0, 6.0]);
let sink = UOp::sink(vec![c.uop().contiguous()]);
let normalization = normalize_for_schedule_cache(&sink).expect("normalize schedule cache");
let rangeify = svod_schedule::rangeify_with_map(normalization.normalized.clone(), None).expect("rangeify");
let (kernel_graph_cached, _) = svod_schedule::try_get_kernel_graph(rangeify.sink).expect("kernel graph");
let pre_schedule_cached = crate::schedule::create_pre_schedule(kernel_graph_cached).expect("pre schedule");
assert!(
pre_schedule_cached
.items
.iter()
.flat_map(|item| item.sources.iter())
.any(|src| matches!(src.op(), Op::Param { device: Some(_), .. })),
"cached pre-schedule should keep normalized PARAM placeholders"
);
let restored = restore_post_schedule_pre_schedule(&pre_schedule_cached, &normalization);
assert!(
restored
.items
.iter()
.flat_map(|item| item.sources.iter())
.all(|src| !matches!(src.op(), Op::Param { device: Some(_), .. })),
"restored pre-schedule should rewrite callable source PARAM placeholders"
);
assert!(
restored.output_buffer_uops.iter().all(|u| !matches!(u.op(), Op::Param { device: Some(_), .. })),
"restored pre-schedule should rewrite output buffer PARAM placeholders"
);
assert!(
restored.items.iter().flat_map(|item| item.sources.iter()).all(|src| !matches!(src.op(), Op::LUnique(_))),
"restored pre-schedule should rewrite LUNIQUE placeholders"
);
assert!(
pre_schedule_cached
.items
.iter()
.flat_map(|item| item.sources.iter())
.any(|src| matches!(src.op(), Op::Param { device: Some(_), .. })),
"restoring should not mutate cached pre-schedule"
);
}
struct TestRenderer;
impl svod_device::device::Renderer for TestRenderer {
fn render(
&self,
ast: &std::sync::Arc<UOp>,
name: Option<&str>,
) -> svod_device::Result<svod_device::device::ProgramSpec> {
let mut spec = svod_device::device::ProgramSpec::new(
name.unwrap_or("kernel").to_string(),
"// test source".to_string(),
svod_dtype::DeviceSpec::Cpu,
ast.clone(),
);
spec.set_buffer_metadata(vec![0], vec![0], vec![]);
spec.buf_count = 1;
Ok(spec)
}
fn device(&self) -> &svod_dtype::DeviceSpec {
static DEVICE: svod_dtype::DeviceSpec = svod_dtype::DeviceSpec::Cpu;
&DEVICE
}
}
struct TestCompiler;
impl svod_device::device::Compiler for TestCompiler {
fn compile(
&self,
spec: &svod_device::device::ProgramSpec,
) -> svod_device::Result<svod_device::device::CompiledSpec> {
Ok(svod_device::device::CompiledSpec::from_bytes(spec.name.clone(), vec![1, 2, 3], spec.ast.clone()))
}
fn cache_key(&self) -> &'static str {
"test"
}
}
#[test]
fn test_compile_with_program_pipeline_components_accepts_program_input() {
let sink = UOp::sink(vec![UOp::native_const(1.0f32)]);
let program = svod_codegen::program_pipeline::program_from_sink(sink, svod_dtype::DeviceSpec::Cpu);
let (spec, compiled) =
compile_with_program_pipeline_components(program, &TestRenderer, &TestCompiler, Some("p_test"))
.expect("PROGRAM input should compile through staged pipeline");
assert_eq!(spec.name, "p_test");
assert_eq!(compiled.name, "p_test");
assert_eq!(compiled.bytes, vec![1, 2, 3]);
}
#[test]
fn test_compile_with_program_pipeline_components_rejects_non_program_input() {
let sink = UOp::sink(vec![UOp::native_const(1.0f32)]);
let err = compile_with_program_pipeline_components(sink, &TestRenderer, &TestCompiler, Some("p_test"))
.expect_err("non-PROGRAM input must fail");
assert!(format!("{err}").contains("expects PROGRAM input"), "unexpected error: {err:?}");
}
#[test]
fn test_compile_with_program_pipeline_components_accepts_stage1_program_input() {
let sink = UOp::sink(vec![UOp::native_const(1.0f32)]);
let linear = UOp::linear(svod_schedule::linearize_with_cfg(sink.clone()).into());
let program = UOp::program(sink, UOp::device(svod_dtype::DeviceSpec::Cpu), Some(linear), None, None);
let (spec, compiled) =
compile_with_program_pipeline_components(program, &TestRenderer, &TestCompiler, Some("p_test"))
.expect("stage-1 PROGRAM input should compile");
assert_eq!(spec.name, "p_test");
assert_eq!(compiled.bytes, vec![1, 2, 3]);
}
#[test]
fn test_compile_with_program_pipeline_components_accepts_stage2_program_input() {
let sink = UOp::sink(vec![UOp::native_const(1.0f32)]);
let linear = UOp::linear(svod_schedule::linearize_with_cfg(sink.clone()).into());
let program = UOp::program(
sink,
UOp::device(svod_dtype::DeviceSpec::Cpu),
Some(linear),
Some(UOp::source("// pre-rendered source".to_string())),
None,
);
let (spec, compiled) =
compile_with_program_pipeline_components(program, &TestRenderer, &TestCompiler, Some("p_test"))
.expect("stage-2 PROGRAM input should compile");
assert_eq!(spec.name, "kernel");
assert_eq!(compiled.bytes, vec![1, 2, 3]);
}
#[test]
fn test_compile_with_program_pipeline_components_rejects_malformed_program_state() {
let sink = UOp::sink(vec![UOp::native_const(1.0f32)]);
let program = UOp::program(
sink,
UOp::device(svod_dtype::DeviceSpec::Cpu),
None,
Some(UOp::source("// malformed source".to_string())),
None,
);
let err = compile_with_program_pipeline_components(program, &TestRenderer, &TestCompiler, Some("p_test"))
.expect_err("malformed PROGRAM input must fail");
assert!(format!("{err}").contains("malformed PROGRAM state"), "unexpected error: {err:?}");
}
#[test]
fn test_collect_non_overridable_fixedvars_locks_only_loop_var_names() {
let body = UOp::sink(vec![UOp::native_const(1.0f32)]);
let call = body.call(SmallVec::new(), svod_ir::CallInfo::default());
let item = crate::schedule::ScheduleItem {
kernel: call,
ast: body,
buffers: vec![],
buffer_uop_ids: vec![],
fixedvars: std::collections::HashMap::from([
("outer_i".to_string(), 2_i64),
("loop_j".to_string(), 1_i64),
("user_n".to_string(), 7_i64),
]),
dependencies: vec![],
instance_dependencies: vec![],
alias_registered_ids: vec![],
loop_var_names: std::collections::HashSet::from(["outer_i".to_string()]),
};
let locked = collect_non_overridable_fixedvars(&item);
assert_eq!(locked.get("outer_i"), Some(&2));
assert!(!locked.contains_key("loop_j"));
assert!(!locked.contains_key("user_n"));
}
#[test]
fn test_realize_simple_add() {
crate::test::helpers::test_setup();
let a = Tensor::from_slice([1.0f32, 2.0, 3.0]);
let b = Tensor::from_slice([4.0f32, 5.0, 6.0]);
let mut c = &a + &b;
c.realize().unwrap();
let result: ndarray::ArrayD<f32> = c.as_ndarray().unwrap();
let (result, _) = result.into_raw_vec_and_offset();
assert_eq!(result, vec![5.0, 7.0, 9.0]);
}
#[test]
fn test_realize_sum() {
crate::test::helpers::test_setup();
let a = Tensor::from_slice([1.0f32, 2.0, 3.0, 4.0]);
let sum_result = a.sum(());
if let Err(ref e) = sum_result {
tracing::debug!(error = ?e, "sum failed");
}
assert!(sum_result.is_ok(), "Sum creation failed");
let mut sum_tensor = sum_result.unwrap();
let realized = sum_tensor.realize();
if let Err(ref e) = realized {
eprintln!("realize failed: {e:?}");
}
assert!(realized.is_ok(), "Realize should succeed: {:?}", realized.err());
}
#[test]
fn test_tensor_device_default_cpu() {
let a = Tensor::from_slice([1.0f32, 2.0, 3.0]);
assert_eq!(a.device(), svod_ir::DeviceSpec::Cpu);
}
#[test]
fn test_tensor_to_same_device_is_noop() {
let a = Tensor::from_slice([1.0f32, 2.0, 3.0]);
let b = a.to(svod_ir::DeviceSpec::Cpu);
assert_eq!(a.device(), b.device());
}
#[test]
fn test_tensor_to_different_device_creates_copy() {
use svod_ir::DeviceSpec;
let a = Tensor::from_slice([1.0f32, 2.0, 3.0]);
let b = a.to(DeviceSpec::Cuda { device_id: 0 });
assert_eq!(b.device(), DeviceSpec::Cuda { device_id: 0 });
assert_eq!(a.device(), DeviceSpec::Cpu);
}
#[test]
fn test_prepare_simple_add() {
crate::test::helpers::test_setup();
let a = Tensor::from_slice([1.0f32, 2.0, 3.0]);
let b = Tensor::from_slice([4.0f32, 5.0, 6.0]);
let mut c = &a + &b;
let plan = c.prepare();
assert!(plan.is_ok(), "prepare() should succeed: {:?}", plan.err());
let plan = plan.unwrap();
assert!(plan.kernels().next().is_some(), "Plan should have at least one kernel");
assert!(!plan.buffers().is_empty(), "Plan should have buffers");
}
#[test]
fn test_prepare_execution_plan_marks_cpu_kernels_host_parallel_safe() {
crate::test::helpers::test_setup();
let a = Tensor::from_slice([1.0f32, 2.0, 3.0]);
let b = Tensor::from_slice([4.0f32, 5.0, 6.0]);
let mut c = &a + &b;
let plan = c.prepare().expect("prepare should succeed");
let compiled: Vec<_> = plan
.prepared_ops()
.iter()
.filter_map(|op| match op {
svod_runtime::PreparedOp::CompiledProgram(kernel) => Some(kernel),
_ => None,
})
.collect();
assert!(!compiled.is_empty(), "prepare should produce compiled kernels");
assert!(
compiled.iter().all(|kernel| kernel.kernel.host_parallel_safe),
"CPU kernels should propagate host_parallel_safe metadata"
);
}
#[test]
fn test_prepare_and_execute() {
crate::test::helpers::test_setup();
let a = Tensor::from_slice([1.0f32, 2.0, 3.0]);
let b = Tensor::from_slice([4.0f32, 5.0, 6.0]);
let mut c = &a + &b;
let plan = c.prepare().expect("prepare should succeed");
let result = plan.execute();
assert!(result.is_ok(), "execute() should succeed: {:?}", result.err());
let output = plan.output_buffer().expect("plan has output");
let mut data = vec![0.0f32; 3];
output
.copyout(unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, 12) })
.expect("copyout should succeed");
assert_eq!(data, vec![5.0, 7.0, 9.0]);
}
#[test]
fn test_prepare_and_execute_twice() {
crate::test::helpers::test_setup();
let a = Tensor::from_slice([1.0f32, 2.0, 3.0]);
let b = Tensor::from_slice([4.0f32, 5.0, 6.0]);
let mut c = &a + &b;
let plan = c.prepare().expect("prepare should succeed");
for _ in 0..2 {
let result = plan.execute();
assert!(result.is_ok(), "execute() should succeed: {:?}", result.err());
}
let output = plan.output_buffer().expect("plan has output");
let mut data = vec![0.0f32; 3];
output
.copyout(unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, 12) })
.expect("copyout should succeed");
assert_eq!(data, vec![5.0, 7.0, 9.0]);
}
#[test]
fn test_prepare_execution_plan_lowers_explicit_custom_function_op() {
crate::test::helpers::test_setup();
let alloc = svod_device::registry::cpu().expect("cpu allocator");
let src = Buffer::new(alloc.clone(), svod_dtype::DType::Float32, vec![4], Default::default());
let dst = Buffer::new(alloc, svod_dtype::DType::Float32, vec![4], Default::default());
let attr = UOp::index_const(42);
let ast = UOp::custom_function(svod_ir::CustomFunctionKind::EncDec, smallvec::smallvec![attr.clone()]);
let kernel = ast.call(smallvec::smallvec![], svod_ir::CallInfo::default());
let schedule_result = crate::schedule::ScheduleResult {
items: vec![crate::schedule::ScheduleItem {
kernel,
ast,
buffers: vec![dst.clone(), src.clone()],
buffer_uop_ids: vec![1001, 1002],
fixedvars: std::collections::HashMap::new(),
dependencies: vec![],
instance_dependencies: vec![],
alias_registered_ids: vec![],
loop_var_names: std::collections::HashSet::new(),
}],
output_uop_ids: vec![1001],
};
let plan = prepare_execution_plan(&schedule_result, &PrepareConfig::from_env()).expect("prepare should succeed");
let custom = plan
.prepared_ops()
.iter()
.find_map(|op| match op {
svod_runtime::PreparedOp::CustomFunction(custom) => Some(custom),
_ => None,
})
.expect("explicit custom function body should lower to PreparedOp::CustomFunction");
assert_eq!(custom.attrs.len(), 1, "custom-function attrs should be preserved into runtime plan");
assert_eq!(custom.attrs[0].id, attr.id);
let err = plan.execute().expect_err("EncDec runtime should be explicit unsupported");
let msg = format!("{err}");
assert!(msg.contains("Unsupported runtime feature EncDec"), "unexpected error: {msg}");
}
#[test]
fn test_realize_buffer_cleanup() {
crate::test::helpers::test_setup();
let a = Tensor::from_slice([1.0f32, 2.0, 3.0]);
let b = Tensor::from_slice([4.0f32, 5.0, 6.0]);
let mut c = &a + &b;
c.realize().expect("realize should succeed");
let result: ndarray::ArrayD<f32> = c.as_ndarray().expect("as_ndarray should succeed");
let (data, _) = result.into_raw_vec_and_offset();
assert_eq!(data, vec![5.0, 7.0, 9.0]);
}
#[test]
#[ignore = "Flaky under parallel global registry activity; run manually with --ignored --test-threads=1"]
fn test_prepare_execute_cleanup() {
crate::test::helpers::test_setup();
let a = Tensor::from_slice([1.0f32, 2.0, 3.0]);
let b = Tensor::from_slice([4.0f32, 5.0, 6.0]);
let mut c = &a + &b;
let plan = c.prepare().expect("prepare should succeed");
for _ in 0..3 {
plan.execute().expect("execute should succeed");
}
let output = plan.output_buffer().expect("plan has output");
let mut data = vec![0.0f32; 3];
output
.copyout(unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, 12) })
.expect("copyout should succeed");
assert_eq!(data, vec![5.0, 7.0, 9.0]);
let count_before_cleanup = crate::tensor_registry::buffer_count();
plan.release_intermediate_buffers(crate::tensor_registry::remove_buffer);
let count_after_cleanup = crate::tensor_registry::buffer_count();
assert!(
count_after_cleanup <= count_before_cleanup,
"Cleanup should not increase buffer count: before={}, after={}",
count_before_cleanup,
count_after_cleanup
);
}
#[test]
#[ignore = "Flaky under parallel global registry activity; run manually with --ignored --test-threads=1"]
fn test_memory_growth_detection() {
crate::test::helpers::test_setup();
const ITERATIONS: usize = 10;
let a = Tensor::from_slice([1.0f32, 2.0, 3.0, 4.0]);
let b = Tensor::from_slice([5.0f32, 6.0, 7.0, 8.0]);
let mut c = &a + &b;
let plan = c.prepare().expect("prepare should succeed");
let mut counts: Vec<usize> = Vec::with_capacity(ITERATIONS);
for _ in 0..ITERATIONS {
plan.execute().expect("execute should succeed");
counts.push(crate::tensor_registry::buffer_count());
}
plan.release_intermediate_buffers(crate::tensor_registry::remove_buffer);
let count_after_cleanup = crate::tensor_registry::buffer_count();
let count_after_first_execute = counts[0];
let growth_during_iterations = counts.last().unwrap().saturating_sub(count_after_first_execute);
eprintln!("Counts during execute: {:?}", counts);
eprintln!("Growth during iterations (after first): {}", growth_during_iterations);
eprintln!("Count after cleanup: {}", count_after_cleanup);
assert_eq!(
growth_during_iterations, 0,
"Registry should not grow during repeated execute() calls (after initial setup)"
);
assert!(
count_after_cleanup <= count_after_first_execute,
"Cleanup should not increase buffer count: first_execute={}, after_cleanup={}",
count_after_first_execute,
count_after_cleanup
);
}
#[test]
fn test_memory_growth_realize_pattern() {
crate::test::helpers::test_setup();
let a = Tensor::from_slice([1.0f32, 2.0, 3.0, 4.0]);
let b = Tensor::from_slice([5.0f32, 6.0, 7.0, 8.0]);
let mut c = &a + &b;
c.realize().expect("realize should succeed");
let result: ndarray::ArrayD<f32> = c.as_ndarray().expect("as_ndarray should succeed");
assert_eq!(result.as_slice().unwrap(), &[6.0, 8.0, 10.0, 12.0]);
}