kaio_macros/lib.rs
1//! # kaio-macros
2//!
3//! Proc macro crate for KAIO. Provides the `#[gpu_kernel]` attribute
4//! macro that transforms Rust function syntax into PTX codegen +
5//! typed launch wrappers.
6//!
7//! This crate is not intended to be used directly — use `kaio` and
8//! import via `kaio::prelude::*`.
9
10#![warn(missing_docs)]
11
12mod codegen;
13mod kernel_ir;
14mod lower;
15mod parse;
16
17use proc_macro::TokenStream;
18use proc_macro2::TokenStream as TokenStream2;
19use syn::ItemFn;
20
21use parse::attrs::parse_kernel_config;
22use parse::body::parse_body;
23use parse::signature::parse_kernel_signature;
24
25/// Marks a function as a GPU kernel compiled to PTX.
26///
27/// # Parameter syntax
28///
29/// Kernel parameters are written as `*const [T]` (primary) or `&[T]`
30/// (sugar) for read-only slices, and `*mut [T]` (primary) or `&mut [T]`
31/// (sugar) for read-write slices. Both forms lower to identical PTX.
32/// The pointer form is recommended because it accurately signals
33/// "device pointer, no aliasing contract" — see RFC-0001. The
34/// reference form is accepted as permanent ergonomic sugar; it will
35/// not be deprecated.
36///
37/// Scalar types (`f32`, `f64`, `i32`, `u32`, `i64`, `u64`, `bool`) are
38/// passed by value.
39///
40/// # DSL, not compiled Rust
41///
42/// The function body uses Rust syntax but is **not compiled by rustc**.
43/// The proc macro parses it into KAIO's own IR (`KernelStmt`) and emits
44/// PTX text directly. No LLVM, no MIR, no borrow checker runs on the
45/// kernel body. ptxas sees a plain `.u64` param for every slice
46/// parameter regardless of which surface syntax you wrote.
47///
48/// Thousands of threads execute the kernel body concurrently, all
49/// accessing the same device buffers. Correctness depends on writing
50/// disjoint access patterns (e.g. `if idx < n` bounds guards), not on
51/// compiler-enforced uniqueness.
52///
53/// You cannot call Rust functions declared outside the kernel inside
54/// the kernel body. The supported syntax subset includes: arithmetic,
55/// comparisons, bitwise ops, short-circuit `&&`/`||`, compound
56/// assignment, `if`/`else`, `for`/`while` loops, `let` bindings, and
57/// KAIO GPU builtins (`thread_idx_x()`, `shared_mem!`, etc.).
58///
59/// # Attributes
60///
61/// - `block_size = N` (required): Number of threads per block. Must be
62/// a power of 2 in the range `[1, 1024]`.
63///
64/// # Example
65///
66/// ```ignore
67/// use kaio::prelude::*;
68///
69/// #[gpu_kernel(block_size = 256)]
70/// fn vector_add(a: *const [f32], b: *const [f32], out: *mut [f32], n: u32) {
71/// let idx = thread_idx_x() + block_idx_x() * block_dim_x();
72/// if idx < n {
73/// out[idx] = a[idx] + b[idx];
74/// }
75/// }
76/// ```
77#[proc_macro_attribute]
78pub fn gpu_kernel(attr: TokenStream, item: TokenStream) -> TokenStream {
79 match gpu_kernel_impl(attr.into(), item.into()) {
80 Ok(tokens) => tokens.into(),
81 Err(err) => err.to_compile_error().into(),
82 }
83}
84
85fn gpu_kernel_impl(attr: TokenStream2, item: TokenStream2) -> syn::Result<TokenStream2> {
86 // Parse the function
87 let func: ItemFn = syn::parse2(item)?;
88
89 // Parse attribute config
90 let config = parse_kernel_config(attr)?;
91
92 // Parse and validate signature
93 let sig = parse_kernel_signature(&func, config)?;
94
95 // Parse body into kernel IR
96 let body = parse_body(&func.block)?;
97
98 // Generate the kernel module (build_ptx + launch)
99 codegen::generate_kernel_module(&sig, &body)
100}