1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// RLX — versatile ML compiler + runtime.
// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 3.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! Persistent buffer handles — state that survives across forward passes.
//!
//! For inference of stateful models (KV-cache, beam search) and for
//! training (gradient accumulators, optimizer state), the runtime needs
//! buffers that persist beyond a single `compiled.run()`. The arena
//! is rebuilt every compile, so it can't carry state.
//!
//! `BufferHandle` is an opaque, stable identifier the user creates once
//! and binds at compile time. The backend allocates a separate "handles"
//! region (independent of the arena) and routes reads/writes there.
//!
//! Workflow:
//!
//! ```rust,ignore
//! let kv_cache = BufferHandle::new("kv", &[batch, max_seq, num_heads, head_dim]);
//! let session = Session::new(Device::Metal);
//! let mut compiled = session.compile_with(graph, &CompileOptions::new()
//! .bind_handle(&kv_cache));
//!
//! for token in tokens {
//! compiled.bind_handle("kv", &kv_cache_data); // initial value
//! let logits = compiled.run(&[("token", &[token])]);
//! kv_cache_data = compiled.read_handle("kv").unwrap();
//! }
//! ```
use ;
/// External, persistent buffer reference. Created once, bound at compile,
/// carried across many `compiled.run()` invocations.