use crate::cognition::delta_modulation::compute_delta_modulation;
use crate::errors::{NoosError, NoosResult};
use crate::inference::cognitive_model::CognitiveModel;
use crate::inference::model::LocalModel;
use crate::inference::sampler::CognitiveSampler;
use crate::inference::tokenizer::NoosTokenizer;
use crate::types::intervention::{CognitiveState, DeltaModulation};
#[derive(Debug, Clone)]
pub struct GenerationStep {
pub token_id: u32,
pub text: String,
pub is_eos: bool,
pub position: usize,
}
pub struct InferenceEngine<M: LocalModel, T: NoosTokenizer> {
model: M,
tokenizer: T,
position: usize,
generated_tokens: Vec<u32>,
prompt_tokens: Vec<u32>,
}
impl<M: LocalModel, T: NoosTokenizer> InferenceEngine<M, T> {
pub fn new(model: M, tokenizer: T) -> Self {
Self {
model,
tokenizer,
position: 0,
generated_tokens: Vec::new(),
prompt_tokens: Vec::new(),
}
}
pub fn set_prompt(&mut self, text: &str) -> NoosResult<()> {
let tokens = self.tokenizer.encode(text, true)?;
if tokens.is_empty() {
return Err(NoosError::Internal("Empty prompt after tokenization".into()));
}
self.model.forward(&tokens, 0)?;
self.prompt_tokens = tokens;
self.position = self.prompt_tokens.len();
self.generated_tokens.clear();
Ok(())
}
pub fn generate_next(
&mut self,
cognitive_state: &CognitiveState,
) -> NoosResult<GenerationStep> {
let input_tokens = if self.generated_tokens.is_empty() {
vec![*self.prompt_tokens.last().ok_or_else(|| {
NoosError::Internal("No prompt set".into())
})?]
} else {
vec![*self.generated_tokens.last().unwrap_or(&0)]
};
let logits = self.model.forward(&input_tokens, self.position)?;
let sampler = CognitiveSampler::from_cognitive_state(cognitive_state);
let all_tokens: Vec<u32> = self
.prompt_tokens
.iter()
.chain(self.generated_tokens.iter())
.copied()
.collect();
let token_id = sampler.sample(&logits, &all_tokens)?;
let text = self.tokenizer.decode_token(token_id)?;
let is_eos = token_id == self.tokenizer.eos_token_id();
let step_position = self.position;
self.generated_tokens.push(token_id);
self.position += 1;
Ok(GenerationStep {
token_id,
text,
is_eos,
position: step_position,
})
}
pub fn generate(
&mut self,
cognitive_state: &CognitiveState,
max_tokens: usize,
) -> NoosResult<String> {
let mut output = String::new();
for _ in 0..max_tokens {
let step = self.generate_next(cognitive_state)?;
if step.is_eos {
break;
}
output.push_str(&step.text);
}
Ok(output)
}
pub fn reset(&mut self) {
self.model.reset_cache();
self.position = 0;
self.generated_tokens.clear();
self.prompt_tokens.clear();
}
pub fn generated_tokens(&self) -> &[u32] {
&self.generated_tokens
}
pub fn position(&self) -> usize {
self.position
}
}
#[derive(Debug, Clone)]
pub struct CognitiveGenerationStep {
pub token_id: u32,
pub text: String,
pub is_eos: bool,
pub position: usize,
pub modulation_applied: bool,
pub modulated_layers: Vec<usize>,
pub delta_modulation: DeltaModulation,
pub gate_alpha: Option<f64>,
pub gate_delta_gain: Option<f64>,
}
impl<M: CognitiveModel, T: NoosTokenizer> InferenceEngine<M, T> {
pub fn generate_next_cognitive(
&mut self,
cognitive_state: &CognitiveState,
) -> NoosResult<CognitiveGenerationStep> {
let input_tokens = if self.generated_tokens.is_empty() {
vec![*self.prompt_tokens.last().ok_or_else(|| {
NoosError::Internal("No prompt set".into())
})?]
} else {
vec![*self.generated_tokens.last().unwrap_or(&0)]
};
let delta_mod = compute_delta_modulation(cognitive_state, self.model.num_layers());
let forward_result =
self.model
.forward_cognitive(&input_tokens, self.position, &delta_mod)?;
let sampler = CognitiveSampler::from_cognitive_state(cognitive_state);
let all_tokens: Vec<u32> = self
.prompt_tokens
.iter()
.chain(self.generated_tokens.iter())
.copied()
.collect();
let token_id = sampler.sample(&forward_result.logits, &all_tokens)?;
let text = self.tokenizer.decode_token(token_id)?;
let is_eos = token_id == self.tokenizer.eos_token_id();
let step_position = self.position;
self.generated_tokens.push(token_id);
self.position += 1;
Ok(CognitiveGenerationStep {
token_id,
text,
is_eos,
position: step_position,
modulation_applied: forward_result.modulation_applied,
modulated_layers: forward_result.modulated_layers,
delta_modulation: delta_mod,
gate_alpha: forward_result.gate_alpha,
gate_delta_gain: forward_result.gate_delta_gain,
})
}
pub fn generate_cognitive(
&mut self,
cognitive_state: &CognitiveState,
max_tokens: usize,
) -> NoosResult<String> {
let mut output = String::new();
for _ in 0..max_tokens {
let step = self.generate_next_cognitive(cognitive_state)?;
if step.is_eos {
break;
}
output.push_str(&step.text);
}
Ok(output)
}
pub fn model_num_layers(&self) -> usize {
self.model.num_layers()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::inference::model::tests::MockModel;
use crate::inference::tokenizer::tests::MockTokenizer;
use crate::types::world::GainMode;
fn make_engine() -> InferenceEngine<MockModel, MockTokenizer> {
InferenceEngine::new(MockModel::new(100), MockTokenizer::new(100))
}
#[test]
fn generate_next_produces_token() {
let mut engine = make_engine();
engine.set_prompt("hello").unwrap();
let step = engine
.generate_next(&CognitiveState::default())
.unwrap();
assert!(!step.text.is_empty());
assert_eq!(engine.generated_tokens().len(), 1);
}
#[test]
fn generate_respects_max_tokens() {
let mut engine = make_engine();
engine.set_prompt("hello").unwrap();
let _result = engine.generate(&CognitiveState::default(), 5).unwrap();
assert!(engine.generated_tokens().len() <= 5);
}
#[test]
fn position_advances() {
let mut engine = make_engine();
engine.set_prompt("hi").unwrap();
let initial_pos = engine.position();
engine.generate_next(&CognitiveState::default()).unwrap();
assert_eq!(engine.position(), initial_pos + 1);
engine.generate_next(&CognitiveState::default()).unwrap();
assert_eq!(engine.position(), initial_pos + 2);
}
#[test]
fn reset_clears_state() {
let mut engine = make_engine();
engine.set_prompt("hello").unwrap();
engine.generate_next(&CognitiveState::default()).unwrap();
engine.reset();
assert_eq!(engine.position(), 0);
assert!(engine.generated_tokens().is_empty());
}
#[test]
fn empty_prompt_returns_error() {
let mut engine = make_engine();
let result = engine.set_prompt("");
assert!(result.is_err());
}
#[test]
fn generate_without_prompt_returns_error() {
let mut engine = make_engine();
let result = engine.generate_next(&CognitiveState::default());
assert!(result.is_err());
}
#[test]
fn cognitive_state_affects_sampling() {
let mut engine1 = make_engine();
let mut engine2 = make_engine();
engine1.set_prompt("test").unwrap();
engine2.set_prompt("test").unwrap();
let phasic = CognitiveState {
gain_mode: GainMode::Phasic,
..CognitiveState::default()
};
let tonic = CognitiveState {
gain_mode: GainMode::Tonic,
..CognitiveState::default()
};
let step1 = engine1.generate_next(&phasic).unwrap();
let step2 = engine2.generate_next(&tonic).unwrap();
assert!(step1.token_id < 100);
assert!(step2.token_id < 100);
}
use crate::inference::cognitive_model::tests::MockCognitiveModel;
fn make_cognitive_engine() -> InferenceEngine<MockCognitiveModel, MockTokenizer> {
InferenceEngine::new(MockCognitiveModel::new(100, 64), MockTokenizer::new(100))
}
#[test]
fn generate_next_cognitive_produces_token() {
let mut engine = make_cognitive_engine();
engine.set_prompt("hello").unwrap();
let step = engine
.generate_next_cognitive(&CognitiveState::default())
.unwrap();
assert!(!step.text.is_empty());
assert_eq!(engine.generated_tokens().len(), 1);
}
#[test]
fn cognitive_generation_applies_delta_modulation() {
let mut engine = make_cognitive_engine();
engine.set_prompt("test").unwrap();
let phasic = CognitiveState {
gain_mode: GainMode::Phasic,
..CognitiveState::default()
};
let step = engine.generate_next_cognitive(&phasic).unwrap();
assert!(
step.modulation_applied,
"Phasic mode should trigger delta modulation"
);
assert!(
step.delta_modulation.gain_factor < 1.0,
"Phasic should reduce delta (compensatory retention)"
);
assert!(
!step.modulated_layers.is_empty(),
"Should have modulated mid-layers"
);
}
#[test]
fn neutral_cognitive_generation_no_modulation() {
let mut engine = make_cognitive_engine();
engine.set_prompt("test").unwrap();
let neutral = CognitiveState::default();
let step = engine.generate_next_cognitive(&neutral).unwrap();
assert!(
!step.modulation_applied,
"Neutral state (gain=1.0) should not modulate"
);
}
#[test]
fn cognitive_and_sampling_stack() {
let mut engine = make_cognitive_engine();
engine.set_prompt("test").unwrap();
let tonic = CognitiveState {
gain_mode: GainMode::Tonic,
..CognitiveState::default()
};
let step = engine.generate_next_cognitive(&tonic).unwrap();
assert!(
step.delta_modulation.gain_factor < 1.0,
"Tonic should reduce delta (Tầng 2)"
);
assert!(step.token_id < 100);
}
#[test]
fn generate_cognitive_multiple_tokens() {
let mut engine = make_cognitive_engine();
engine.set_prompt("hello").unwrap();
let result = engine
.generate_cognitive(&CognitiveState::default(), 3)
.unwrap();
assert!(!result.is_empty());
assert!(engine.generated_tokens().len() <= 3);
}
#[test]
fn model_num_layers_accessible() {
let engine = make_cognitive_engine();
assert_eq!(engine.model_num_layers(), 64);
}
}