pub mod file_loader;
use anyhow::Result;
use moe_core::core::inference::InferenceEngine;
use file_loader::{load_expert_bank, EXPERT_INPUT_DIM, EXPERT_OUTPUT_DIM};
pub struct LoadedModel {
pub(crate) engine: InferenceEngine,
pub model_id: String,
pub input_dim: usize,
pub output_dim: usize,
}
pub struct InferenceResult {
pub trit_verdict: i8,
pub confidence: f32,
pub output_vec: Vec<f32>,
pub routing_summary: String,
}
pub struct Platform;
impl Platform {
pub fn new() -> Self {
Self
}
pub fn load_model(&self, model_id: &str) -> Result<LoadedModel> {
Ok(LoadedModel {
engine: InferenceEngine::new(
format!("epis-v1.0/{}", model_id),
EXPERT_INPUT_DIM,
EXPERT_OUTPUT_DIM,
),
model_id: model_id.to_string(),
input_dim: EXPERT_INPUT_DIM,
output_dim: EXPERT_OUTPUT_DIM,
})
}
pub fn load_model_from_file(&self, path: &str) -> Result<LoadedModel> {
let (expert_bank, info) = load_expert_bank(path)?;
log::info!(
"Loaded '{}' — {} layers → 13 experts | sparsity {:.1}% | ᾱ={:.4}",
info.source_model,
info.num_layers,
info.sparsity * 100.0,
info.mean_alpha,
);
let mut engine = InferenceEngine::new(
format!("epis-v1.0/{}", info.source_model),
EXPERT_INPUT_DIM,
EXPERT_OUTPUT_DIM,
);
engine.expert_bank = expert_bank;
Ok(LoadedModel {
engine,
model_id: info.source_model,
input_dim: EXPERT_INPUT_DIM,
output_dim: EXPERT_OUTPUT_DIM,
})
}
pub fn run_inference(&self, model: &LoadedModel, prompt: &str) -> Result<InferenceResult> {
let mut input = encode_prompt(prompt, model.input_dim);
let output = model.engine.forward(&mut input)?;
Ok(decode_result(output, model))
}
}
impl Default for Platform {
fn default() -> Self {
Self::new()
}
}
fn encode_prompt(prompt: &str, dim: usize) -> Vec<f32> {
let mut vec = vec![0.0f32; dim];
for (i, b) in prompt.bytes().enumerate() {
vec[i % dim] += (b as f32 - 128.0) / 128.0;
}
let norm = vec.iter().map(|x| x * x).sum::<f32>().sqrt().max(1e-9);
vec.iter_mut().for_each(|x| *x /= norm);
vec
}
fn decode_result(output: Vec<f32>, model: &LoadedModel) -> InferenceResult {
let mean = output.iter().sum::<f32>() / output.len() as f32;
let trit_verdict: i8 = if mean > 0.05 { 1 } else if mean < -0.05 { -1 } else { 0 };
let confidence = mean.abs().min(1.0);
let verdict_label = match trit_verdict {
1 => "affirm (+1)",
-1 => "reject (-1)",
_ => "hold ( 0)",
};
let routing_summary = format!(
"model={} | kernel={} | dims={}→{} | verdict={} | confidence={:.3}",
model.model_id,
model.engine.kernel_version,
model.input_dim,
model.output_dim,
verdict_label,
confidence,
);
InferenceResult { trit_verdict, confidence, output_vec: output, routing_summary }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_load_and_infer() {
let platform = Platform::new();
let model = platform.load_model("test-epis").unwrap();
let result = platform.run_inference(&model, "Should we proceed?").unwrap();
assert!([-1i8, 0, 1].contains(&result.trit_verdict));
assert!(result.confidence >= 0.0 && result.confidence <= 1.0);
assert_eq!(result.output_vec.len(), model.output_dim);
}
#[test]
fn test_epis_determinism() {
let platform = Platform::new();
let model = platform.load_model("test-epis").unwrap();
let prompt = "Is this action safe?";
let a = platform.run_inference(&model, prompt).unwrap();
let b = platform.run_inference(&model, prompt).unwrap();
assert_eq!(a.trit_verdict, b.trit_verdict,
"EPIS must produce identical verdicts for identical input");
assert_eq!(a.output_vec, b.output_vec,
"EPIS must produce identical activations for identical input");
}
#[test]
fn test_different_prompts_may_differ() {
let platform = Platform::new();
let model = platform.load_model("test-epis").unwrap();
let a = platform.run_inference(&model, "proceed").unwrap();
let b = platform.run_inference(&model, "abort").unwrap();
assert_ne!(a.output_vec, b.output_vec,
"Different prompts must produce different activations");
}
#[test]
fn test_load_from_file_if_available() {
let candidates = [
"/home/eri-irfos/llama32-1b.tern.bin",
"/home/eri-irfos/Desktop/llama32-1b.tern.bin",
];
let path = candidates.iter().find(|p| std::path::Path::new(p).exists());
if let Some(p) = path {
let platform = Platform::new();
let model = platform.load_model_from_file(p).unwrap();
let result = platform.run_inference(&model, "What is ternary logic?").unwrap();
assert!([-1i8, 0, 1].contains(&result.trit_verdict));
println!("✓ Real model loaded: {}", result.routing_summary);
} else {
println!("⚠ No .tern.bin found — skipping file-load smoke test");
println!(" Run: python3 scripts/transmute_llama.py to generate one");
}
}
}