kaio_macros/lib.rs
1//! # kaio-macros
2//!
3//! Proc macro crate for KAIO. Provides the `#[gpu_kernel]` attribute
4//! macro that transforms Rust function syntax into PTX codegen +
5//! typed launch wrappers.
6//!
7//! This crate is not intended to be used directly — use `kaio` and
8//! import via `kaio::prelude::*`.
9
10#![warn(missing_docs)]
11
12mod codegen;
13mod kernel_ir;
14mod lower;
15mod parse;
16
17use proc_macro::TokenStream;
18use proc_macro2::TokenStream as TokenStream2;
19use syn::ItemFn;
20
21use parse::attrs::parse_kernel_config;
22use parse::body::parse_body;
23use parse::signature::parse_kernel_signature;
24
25/// Marks a function as a GPU kernel compiled to PTX.
26///
27/// # DSL, not compiled Rust
28///
29/// The function body uses Rust syntax but is **not compiled by rustc**.
30/// The proc macro parses it into KAIO's own IR (`KernelStmt`) and emits
31/// PTX text directly. No LLVM, no MIR, no borrow checker runs on the
32/// kernel body.
33///
34/// This has an important consequence for `&mut [T]` parameters: in
35/// standard Rust, `&mut T` carries a `noalias` guarantee — the compiler
36/// assumes exclusive access. In a GPU kernel, thousands of threads
37/// execute the same function body concurrently, all accessing the same
38/// buffer. Because the body never reaches rustc's backend, no `noalias`
39/// attribute is emitted — ptxas sees a plain `.u64` param. There is no
40/// UB from the aliasing mismatch, but the `&mut` syntax is misleading:
41/// correctness depends on the kernel author writing disjoint access
42/// patterns (e.g. `if idx < n` bounds guards), not on compiler-enforced
43/// uniqueness.
44///
45/// A future release will accept `*mut [T]` / `*const [T]` as the
46/// primary kernel parameter syntax to better communicate this. See
47/// RFC-0001 in the repository for the design direction.
48///
49/// You cannot call Rust functions declared outside the kernel inside the
50/// kernel body. The supported syntax subset includes: arithmetic,
51/// comparisons, bitwise ops, short-circuit `&&`/`||`, compound
52/// assignment, `if`/`else`, `for`/`while` loops, `let` bindings, and
53/// KAIO GPU builtins (`thread_idx_x()`, `shared_mem!`, etc.).
54///
55/// # Attributes
56///
57/// - `block_size = N` (required): Number of threads per block. Must be
58/// a power of 2 in the range `[1, 1024]`.
59///
60/// # Example
61///
62/// ```ignore
63/// use kaio::prelude::*;
64///
65/// #[gpu_kernel(block_size = 256)]
66/// fn vector_add(a: &[f32], b: &[f32], out: &mut [f32], n: u32) {
67/// let idx = thread_idx_x() + block_idx_x() * block_dim_x();
68/// if idx < n {
69/// out[idx] = a[idx] + b[idx];
70/// }
71/// }
72/// ```
73#[proc_macro_attribute]
74pub fn gpu_kernel(attr: TokenStream, item: TokenStream) -> TokenStream {
75 match gpu_kernel_impl(attr.into(), item.into()) {
76 Ok(tokens) => tokens.into(),
77 Err(err) => err.to_compile_error().into(),
78 }
79}
80
81fn gpu_kernel_impl(attr: TokenStream2, item: TokenStream2) -> syn::Result<TokenStream2> {
82 // Parse the function
83 let func: ItemFn = syn::parse2(item)?;
84
85 // Parse attribute config
86 let config = parse_kernel_config(attr)?;
87
88 // Parse and validate signature
89 let sig = parse_kernel_signature(&func, config)?;
90
91 // Parse body into kernel IR
92 let body = parse_body(&func.block)?;
93
94 // Generate the kernel module (build_ptx + launch)
95 codegen::generate_kernel_module(&sig, &body)
96}