use crate::falcon2::config::Falcon2Config;
use crate::falcon2::model::{Falcon2AlibiPositionalBias, Falcon2Error, Falcon2Model};
pub struct Falcon2ForCausalLM {
pub model: Falcon2Model,
pub lm_head_weight: Vec<f32>,
}
impl Falcon2ForCausalLM {
pub fn new(config: Falcon2Config) -> Self {
let v = config.vocab_size;
let h = config.hidden_size;
let model = Falcon2Model::new(config);
let lm_head_weight = vec![0.0_f32; v * h];
Self {
model,
lm_head_weight,
}
}
pub fn forward(&self, input_ids: &[u32]) -> Result<Vec<f32>, Falcon2Error> {
let hidden = self.model.forward(input_ids)?;
let seq_len = input_ids.len();
let h = self.model.config.hidden_size;
let v = self.model.config.vocab_size;
let mut logits = vec![0.0_f32; seq_len * v];
for pos in 0..seq_len {
let h_slice = &hidden[pos * h..(pos + 1) * h];
for vocab_idx in 0..v {
let weight_slice = &self.lm_head_weight[vocab_idx * h..(vocab_idx + 1) * h];
let dot: f32 = h_slice.iter().zip(weight_slice.iter()).map(|(x, w)| x * w).sum();
logits[pos * v + vocab_idx] = dot;
}
}
Ok(logits)
}
pub fn generate(
&self,
input_ids: &[u32],
max_new_tokens: usize,
) -> Result<Vec<u32>, Falcon2Error> {
if input_ids.is_empty() {
return Err(Falcon2Error::InvalidInput(
"input_ids must not be empty for generation".to_string(),
));
}
let v = self.model.config.vocab_size;
let mut ids: Vec<u32> = input_ids.to_vec();
for _ in 0..max_new_tokens {
let logits = self.forward(&ids)?;
let seq_len = ids.len();
let last_logits = &logits[(seq_len - 1) * v..seq_len * v];
let next_token = last_logits
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map(|(idx, _)| idx as u32)
.ok_or_else(|| Falcon2Error::ForwardError("empty logits".to_string()))?;
ids.push(next_token);
}
Ok(ids[input_ids.len()..].to_vec())
}
pub fn format_chat_prompt(user: &str) -> String {
format!("User: {user}\nFalcon: ")
}
pub fn compute_alibi_slopes(n: usize) -> Vec<f32> {
Falcon2AlibiPositionalBias::compute_slopes(n)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::falcon2::{
config::Falcon2Config,
model::{
Falcon2AlibiPositionalBias, Falcon2DecoderLayer, Falcon2Error, Falcon2LayerNorm,
Falcon2MLP,
},
};
fn tiny_config() -> Falcon2Config {
Falcon2Config {
hidden_size: 64,
num_hidden_layers: 2,
num_attention_heads: 4,
num_kv_heads: 1,
intermediate_size: 128,
max_position_embeddings: 512,
vocab_size: 128,
layer_norm_epsilon: 1e-5,
use_alibi: true,
parallel_attn: true,
bias: false,
hidden_act: "gelu".to_string(),
}
}
#[test]
fn test_falcon2_config_default() {
let cfg = Falcon2Config::default();
assert_eq!(cfg.hidden_size, 4096);
assert_eq!(cfg.num_hidden_layers, 60);
assert_eq!(cfg.num_attention_heads, 64);
assert_eq!(cfg.num_kv_heads, 1);
assert_eq!(cfg.intermediate_size, 16384);
assert_eq!(cfg.max_position_embeddings, 8192);
assert_eq!(cfg.vocab_size, 65024);
assert!(cfg.use_alibi);
assert!(cfg.parallel_attn);
assert!(!cfg.bias);
assert_eq!(cfg.hidden_act, "gelu");
}
#[test]
fn test_falcon2_config_custom() {
let cfg = tiny_config();
assert_eq!(cfg.hidden_size, 64);
assert_eq!(cfg.num_kv_heads, 1);
assert_eq!(cfg.head_dim(), 64 / 4);
}
#[test]
fn test_falcon2_alibi_slopes_compute() {
let slopes = Falcon2AlibiPositionalBias::compute_slopes(4);
assert_eq!(slopes.len(), 4);
assert!(
(slopes[0] - 0.25).abs() < 1e-5,
"slope[0] should be 0.25, got {}",
slopes[0]
);
assert!(
(slopes[1] - 0.0625).abs() < 1e-5,
"slope[1] should be 0.0625, got {}",
slopes[1]
);
}
#[test]
fn test_falcon2_alibi_slopes_decreasing() {
let slopes = Falcon2AlibiPositionalBias::compute_slopes(8);
for w in slopes.windows(2) {
assert!(
w[0] > w[1],
"slopes must be strictly decreasing: {} !> {}",
w[0],
w[1]
);
}
}
#[test]
fn test_falcon2_alibi_bias_shape() {
let num_heads = 4;
let seq_len = 6;
let slopes = Falcon2AlibiPositionalBias::compute_slopes(num_heads);
let bias = Falcon2AlibiPositionalBias::compute_bias(seq_len, &slopes);
assert_eq!(bias.len(), num_heads * seq_len * seq_len);
}
#[test]
fn test_falcon2_alibi_causal_mask() {
let num_heads = 2;
let seq_len = 4;
let slopes = Falcon2AlibiPositionalBias::compute_slopes(num_heads);
let bias = Falcon2AlibiPositionalBias::compute_bias(seq_len, &slopes);
for h in 0..num_heads {
for i in 0..seq_len {
let val = bias[h * seq_len * seq_len + i * seq_len + i];
assert!((val).abs() < 1e-7, "diagonal bias must be 0, got {val}");
}
}
for h in 0..num_heads {
for i in 0..seq_len {
for j in 0..seq_len {
if i != j {
let val = bias[h * seq_len * seq_len + i * seq_len + j];
assert!(val < 0.0, "off-diagonal bias must be negative, got {val}");
}
}
}
}
}
#[test]
fn test_falcon2_layer_norm() {
let x = vec![1.0_f32, 2.0, 3.0, 4.0];
let weight = vec![1.0_f32; 4];
let bias = vec![0.0_f32; 4];
let out = Falcon2LayerNorm::forward(&x, &weight, &bias, 1e-5);
assert_eq!(out.len(), 4);
let mean = out.iter().sum::<f32>() / 4.0;
assert!(
mean.abs() < 1e-5,
"LayerNorm output mean should be ~0, got {mean}"
);
}
#[test]
fn test_falcon2_gelu_activation() {
let y = Falcon2MLP::gelu(1.0);
assert!(
(y - 0.841).abs() < 0.005,
"gelu(1.0) should be ~0.841, got {y}"
);
let large = Falcon2MLP::gelu(10.0);
assert!(
(large - 10.0).abs() < 0.1,
"gelu(10.0) should be ~10.0, got {large}"
);
}
#[test]
fn test_falcon2_gelu_zero() {
let y = Falcon2MLP::gelu(0.0);
assert!(y.abs() < 1e-6, "gelu(0) must be 0, got {y}");
}
#[test]
fn test_falcon2_mqa_single_kv_head() {
let cfg = tiny_config();
assert_eq!(cfg.num_kv_heads, 1, "Falcon-2 uses single KV head (MQA)");
assert!(cfg.num_attention_heads > cfg.num_kv_heads);
}
#[test]
fn test_falcon2_parallel_attn_mlp() {
let cfg = tiny_config();
let h = cfg.hidden_size;
let seq_len = 4;
let layer = Falcon2DecoderLayer::new(cfg);
let input = vec![0.5_f32; seq_len * h];
let out = layer.forward(&input, seq_len);
assert_eq!(out.len(), seq_len * h);
}
#[test]
fn test_falcon2_model_forward() {
let cfg = tiny_config();
let h = cfg.hidden_size;
use crate::falcon2::model::Falcon2Model;
let model = Falcon2Model::new(cfg);
let input_ids = vec![1_u32, 2, 3, 4];
let out = model.forward(&input_ids).expect("forward should succeed");
assert_eq!(out.len(), 4 * h);
}
#[test]
fn test_falcon2_causal_lm_forward() {
let cfg = tiny_config();
let v = cfg.vocab_size;
let lm = Falcon2ForCausalLM::new(cfg);
let input_ids = vec![1_u32, 2, 3];
let logits = lm.forward(&input_ids).expect("forward should succeed");
assert_eq!(
logits.len(),
3 * v,
"logits shape must be [seq_len * vocab_size]"
);
}
#[test]
fn test_falcon2_generate() {
let cfg = tiny_config();
let lm = Falcon2ForCausalLM::new(cfg);
let input_ids = vec![1_u32, 2];
let generated = lm.generate(&input_ids, 3).expect("generate should succeed");
assert_eq!(generated.len(), 3);
for tok in &generated {
assert!(*tok < 128, "generated tokens must be within vocab");
}
}
#[test]
fn test_falcon2_chat_format() {
let prompt = Falcon2ForCausalLM::format_chat_prompt("What is Rust?");
assert!(prompt.starts_with("User: "));
assert!(prompt.contains("What is Rust?"));
assert!(prompt.contains("Falcon: "));
}
#[test]
fn test_falcon2_error_types() {
let e1 = Falcon2Error::InvalidInput("bad".to_string());
let e2 = Falcon2Error::ForwardError("nan".to_string());
assert!(e1.to_string().contains("bad"));
assert!(e2.to_string().contains("nan"));
let _boxed: Box<dyn std::error::Error> = Box::new(Falcon2Error::ForwardError("x".into()));
}
#[test]
fn test_falcon2_empty_input_error() {
let cfg = tiny_config();
use crate::falcon2::model::Falcon2Model;
let model = Falcon2Model::new(cfg);
let err = model.forward(&[]);
assert!(err.is_err(), "empty input should return an error");
}
#[test]
fn test_falcon2_oov_token_error() {
let cfg = tiny_config();
use crate::falcon2::model::Falcon2Model;
let model = Falcon2Model::new(cfg);
let err = model.forward(&[128_u32]); assert!(err.is_err(), "out-of-vocab token should return an error");
}
}