pub mod reliable;
#[derive(Debug, Clone)]
pub struct Qwen2Config {
pub hidden_size: usize,
pub num_attention_heads: usize,
pub num_kv_heads: usize,
pub num_layers: usize,
pub vocab_size: usize,
pub max_seq_len: usize,
pub intermediate_size: usize,
pub rope_theta: f64,
}
impl Default for Qwen2Config {
fn default() -> Self {
Self::qwen2_0_5b_instruct()
}
}
impl Qwen2Config {
pub const VOCAB_SIZE: usize = 151_936;
#[must_use]
pub const fn qwen2_0_5b_instruct() -> Self {
Self {
hidden_size: 896,
num_attention_heads: 14,
num_kv_heads: 2,
num_layers: 24,
vocab_size: Self::VOCAB_SIZE,
max_seq_len: 32768,
intermediate_size: 4864,
rope_theta: 1_000_000.0,
}
}
#[must_use]
pub const fn qwen25_coder_0_5b_instruct() -> Self {
Self::qwen2_0_5b_instruct()
}
#[must_use]
pub fn model_size_fp16(&self) -> usize {
let embedding_size = self.vocab_size * self.hidden_size * 2; let layer_size = self.hidden_size * self.hidden_size * 4 * 2; let ffn_size = self.hidden_size * self.intermediate_size * 3 * 2; let total_layers = (layer_size + ffn_size) * self.num_layers;
let lm_head = self.vocab_size * self.hidden_size * 2;
embedding_size + total_layers + lm_head
}
#[must_use]
pub fn model_size_int4(&self) -> usize {
self.model_size_fp16() / 4
}
#[must_use]
pub fn kv_cache_size(&self, seq_len: usize) -> usize {
let head_dim = self.hidden_size / self.num_attention_heads;
2 * self.num_layers * self.num_kv_heads * seq_len * head_dim * 2
}
}
#[derive(Debug, Clone)]
pub struct Qwen2Tokenizer {
pub vocab_size: usize,
pub special_tokens: SpecialTokens,
}
#[derive(Debug, Clone)]
pub struct SpecialTokens {
pub bos_id: u32,
pub eos_id: u32,
pub pad_id: u32,
pub im_start_id: u32,
pub im_end_id: u32,
}
impl SpecialTokens {
#[must_use]
pub const fn qwen2() -> Self {
Self {
bos_id: 151_643,
eos_id: 151_645,
pad_id: 151_643,
im_start_id: 151_644,
im_end_id: 151_645,
}
}
#[must_use]
pub const fn qwen3_5() -> Self {
Self {
bos_id: 0, eos_id: 248_044,
pad_id: 0, im_start_id: 0, im_end_id: 0,
}
}
#[must_use]
pub const fn llama() -> Self {
Self {
bos_id: 128_000,
eos_id: 128_001,
pad_id: 128_001,
im_start_id: 0, im_end_id: 0,
}
}
#[must_use]
pub const fn mistral() -> Self {
Self {
bos_id: 1,
eos_id: 2,
pad_id: 0, im_start_id: 0,
im_end_id: 0,
}
}
#[must_use]
pub const fn gemma() -> Self {
Self {
bos_id: 2,
eos_id: 1,
pad_id: 0,
im_start_id: 0,
im_end_id: 0,
}
}
#[must_use]
pub const fn deepseek() -> Self {
Self {
bos_id: 0,
eos_id: 1,
pad_id: 0, im_start_id: 0,
im_end_id: 0,
}
}
#[must_use]
pub const fn phi3() -> Self {
Self {
bos_id: 1,
eos_id: 32_000,
pad_id: 32_000,
im_start_id: 0,
im_end_id: 0,
}
}
#[must_use]
pub const fn phi2() -> Self {
Self {
bos_id: 0, eos_id: 50_256,
pad_id: 50_256,
im_start_id: 0,
im_end_id: 0,
}
}
#[must_use]
pub const fn gpt2() -> Self {
Self {
bos_id: 0, eos_id: 50_256,
pad_id: 50_256,
im_start_id: 0,
im_end_id: 0,
}
}
#[must_use]
pub fn from_architecture(arch: &str) -> Option<Self> {
match arch {
"qwen2" | "qwen3" | "qwen3moe" => Some(Self::qwen2()),
"qwen3_5" => Some(Self::qwen3_5()),
"llama" => Some(Self::llama()),
"mistral" => Some(Self::mistral()),
"gemma" | "gemma2" => Some(Self::gemma()),
"deepseek" | "deepseek2" => Some(Self::deepseek()),
"phi3" => Some(Self::phi3()),
"phi" | "phi2" => Some(Self::phi2()),
"gpt2" => Some(Self::gpt2()),
_ => None,
}
}
}
impl Qwen2Tokenizer {
#[must_use]
pub fn new() -> Self {
Self {
vocab_size: Qwen2Config::VOCAB_SIZE,
special_tokens: SpecialTokens::qwen2(),
}
}
#[must_use]
pub fn is_eos(&self, token_id: u32) -> bool {
token_id == self.special_tokens.eos_id || token_id == self.special_tokens.im_end_id
}
#[must_use]
pub fn is_special(&self, token_id: u32) -> bool {
token_id == self.special_tokens.bos_id
|| token_id == self.special_tokens.eos_id
|| token_id == self.special_tokens.pad_id
|| token_id == self.special_tokens.im_start_id
|| token_id == self.special_tokens.im_end_id
}
#[must_use]
pub fn format_instruction(&self, instruction: &str) -> String {
format!("<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n")
}
}
impl Default for Qwen2Tokenizer {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Default)]
pub struct DemoMetrics {
pub load_time_ms: u64,
pub first_token_ms: u64,
pub tokens_per_sec: f64,
pub peak_memory_bytes: usize,
pub tokens_generated: usize,
}
impl DemoMetrics {
#[must_use]
pub fn meets_targets(&self) -> bool {
self.load_time_ms < 5000
&& self.first_token_ms < 2000
&& self.tokens_per_sec >= 15.0
&& self.peak_memory_bytes < 512 * 1024 * 1024
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QuantizationType {
Int4,
Int8,
Fp16,
Fp32,
}
impl QuantizationType {
#[must_use]
pub fn bits(&self) -> usize {
match self {
Self::Int4 => 4,
Self::Int8 => 8,
Self::Fp16 => 16,
Self::Fp32 => 32,
}
}
#[must_use]
pub fn compression_ratio(&self) -> f64 {
32.0 / self.bits() as f64
}
}
#[derive(Debug)]
pub struct PerplexityChecker {
pub baseline_ppl: f64,
pub max_degradation_pct: f64,
}
impl PerplexityChecker {
#[must_use]
pub fn new(baseline_ppl: f64) -> Self {
Self {
baseline_ppl,
max_degradation_pct: 15.0,
}
}
#[must_use]
pub fn is_acceptable(&self, quantized_ppl: f64) -> bool {
let degradation_pct = ((quantized_ppl - self.baseline_ppl) / self.baseline_ppl) * 100.0;
degradation_pct <= self.max_degradation_pct
}
#[must_use]
pub fn degradation_pct(&self, quantized_ppl: f64) -> f64 {
((quantized_ppl - self.baseline_ppl) / self.baseline_ppl) * 100.0
}
}
#[derive(Debug, Clone)]
pub struct BrowserCompatibility {
pub chrome_min: u32,
pub firefox_min: u32,
pub safari_min: u32,
}
impl Default for BrowserCompatibility {
fn default() -> Self {
Self {
chrome_min: 120,
firefox_min: 120,
safari_min: 17,
}
}
}
impl BrowserCompatibility {
#[must_use]
pub fn supports_chrome(&self, version: u32) -> bool {
version >= self.chrome_min
}
#[must_use]
pub fn supports_firefox(&self, version: u32) -> bool {
version >= self.firefox_min
}
#[must_use]
pub fn supports_safari(&self, version: u32) -> bool {
version >= self.safari_min
}
}
#[cfg(test)]
#[path = "demo_tests.rs"]
mod tests;