rlx-flow 0.2.4

Block assembly-line API for RLX model builders — fusion-first, config-driven
Documentation
// RLX — versatile ML compiler + runtime.
// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 3.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

use anyhow::Result;
use rlx_ir::HirGraphExt;
use rlx_ir::hir::HirMut;

use super::BlockStage;
use crate::context::FlowCtx;
use crate::value::FlowValue;

/// Gather rows from an embedding table using a named graph input as indices.
#[derive(Debug, Clone)]
pub struct GatherFromInputStage {
    pub input_name: String,
    pub weight_key: String,
    pub axis: usize,
}

impl GatherFromInputStage {
    pub fn new(input_name: impl Into<String>, weight_key: impl Into<String>, axis: usize) -> Self {
        Self {
            input_name: input_name.into(),
            weight_key: weight_key.into(),
            axis,
        }
    }
}

impl BlockStage for GatherFromInputStage {
    fn emit(&self, ctx: &mut FlowCtx<'_>, _input: FlowValue) -> Result<Option<FlowValue>> {
        let (indices_id, indices_shape) = ctx
            .state
            .inputs
            .get(&self.input_name)
            .ok_or_else(|| anyhow::anyhow!("GatherFromInput missing input `{}`", self.input_name))?
            .clone();
        let embed_w = ctx.load_param(&self.weight_key, false)?;
        let w_shape = ctx.hir().node(embed_w).shape.clone();
        let mut dims: Vec<rlx_ir::Dim> = indices_shape.dims().to_vec();
        dims.push(w_shape.dim(1));
        // Gather output carries the TABLE's dtype, not the index dtype —
        // mixing these produced corrupt embeddings on backends that
        // honor declared dtypes.
        let out_shape = rlx_ir::Shape::from_dims(&dims, w_shape.dtype());

        let mut gb = HirMut::new(ctx.hir());
        let id = gb.gather_(embed_w, indices_id, self.axis);
        Ok(Some(ctx.wrap(id, out_shape)))
    }
}

/// Add a gather-from-input embedding to the active hidden tensor.
#[derive(Debug, Clone)]
pub struct GatherAddStage {
    pub input_name: String,
    pub weight_key: String,
    pub axis: usize,
}

impl GatherAddStage {
    pub fn new(input_name: impl Into<String>, weight_key: impl Into<String>, axis: usize) -> Self {
        Self {
            input_name: input_name.into(),
            weight_key: weight_key.into(),
            axis,
        }
    }
}

impl BlockStage for GatherAddStage {
    fn emit(&self, ctx: &mut FlowCtx<'_>, input: FlowValue) -> Result<Option<FlowValue>> {
        let (indices_id, indices_shape) = ctx
            .state
            .inputs
            .get(&self.input_name)
            .ok_or_else(|| anyhow::anyhow!("GatherAdd missing input `{}`", self.input_name))?
            .clone();
        let embed_w = ctx.load_param(&self.weight_key, false)?;
        let w_shape = ctx.hir().node(embed_w).shape.clone();
        let mut dims: Vec<rlx_ir::Dim> = indices_shape.dims().to_vec();
        dims.push(w_shape.dim(1));
        let out_shape = rlx_ir::Shape::from_dims(&dims, w_shape.dtype());

        let mut gb = HirMut::new(ctx.hir());
        let gathered = gb.gather_(embed_w, indices_id, self.axis);
        let id = gb.add(input.id, gathered);
        Ok(Some(ctx.wrap(id, out_shape)))
    }
}