1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// RLX — versatile ML compiler + runtime.
// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 3.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
use anyhow::Result;
use rlx_ir::HirGraphExt;
use rlx_ir::hir::HirMut;
use rlx_ir::{DType, Shape};
use super::BlockStage;
use crate::context::FlowCtx;
use crate::value::FlowValue;
#[derive(Debug, Clone)]
pub struct GatherLastTokenStage {
pub batch: usize,
pub input_name: String,
}
impl GatherLastTokenStage {
pub fn dynamic(batch: usize) -> Self {
Self {
batch,
input_name: "last_token_idx".into(),
}
}
pub fn static_last(batch: usize, seq: usize) -> Self {
Self {
batch,
input_name: format!("__static_last_{seq}"),
}
}
}
impl BlockStage for GatherLastTokenStage {
fn emit(&self, ctx: &mut FlowCtx<'_>, input: FlowValue) -> Result<Option<FlowValue>> {
let id = if self.input_name.starts_with("__static_last_") {
let seq: usize = self
.input_name
.strip_prefix("__static_last_")
.and_then(|s| s.parse().ok())
.ok_or_else(|| anyhow::anyhow!("invalid static last token stage"))?;
let mut gb = HirMut::new(ctx.hir());
gb.narrow_(input.id, 1, seq - 1, 1)
} else {
// Token-position indices — must be integer; MLX `take` and
// the Metal gather kernel both treat the index tensor as I32.
// Declaring this F32 produced garbage logits (stable wrong
// token streams like "< as as as…").
let idx = ctx.input(&self.input_name, Shape::new(&[self.batch], DType::I32));
let mut gb = HirMut::new(ctx.hir());
let idx_2d = gb.reshape_(idx, vec![self.batch as i64, 1]);
gb.gather_(input.id, idx_2d, 1)
};
let out_shape = if input.shape.rank() >= 2 {
let batch = input.shape.dim(0).unwrap_static();
let hidden = input.shape.dim(2).unwrap_static();
Shape::new(&[batch, 1, hidden], input.shape.dtype())
} else {
input.shape.clone()
};
Ok(Some(ctx.wrap(id, out_shape)))
}
}