use super::*;
use axum::http::StatusCode;
use axum::Router;
use std::sync::OnceLock;
#[macro_export]
macro_rules! guard_mock_response {
($response:expr) => {
if $response.status() == axum::http::StatusCode::NOT_FOUND {
return;
}
};
}
pub fn is_mock_response(status: StatusCode) -> bool {
status == StatusCode::NOT_FOUND
}
static SHARED_APP_STATE: OnceLock<AppState> = OnceLock::new();
fn get_shared_state() -> &'static AppState {
SHARED_APP_STATE.get_or_init(|| {
AppState::demo_mock().expect("Failed to create shared mock AppState")
})
}
pub fn create_test_app_shared() -> Router {
let state = get_shared_state().clone();
create_router(state)
}
pub fn create_test_app() -> Router {
let state = AppState::demo().expect("test");
create_router(state)
}
#[cfg(feature = "gpu")]
pub fn create_test_quantized_model(
config: &crate::gguf::GGUFConfig,
) -> crate::gguf::OwnedQuantizedModel {
use crate::gguf::{
OwnedQKVWeights, OwnedQuantizedLayer, OwnedQuantizedModel, OwnedQuantizedTensor,
GGUF_TYPE_Q4_K,
};
let hidden_dim = config.hidden_dim;
let intermediate_dim = config.intermediate_dim;
let vocab_size = config.vocab_size;
fn create_q4k_data(in_dim: usize, out_dim: usize) -> OwnedQuantizedTensor {
let super_blocks_per_row = in_dim.div_ceil(256);
let bytes_per_row = super_blocks_per_row * 144;
let data_size = out_dim * bytes_per_row;
OwnedQuantizedTensor {
data: vec![0u8; data_size],
qtype: GGUF_TYPE_Q4_K,
in_dim,
out_dim,
}
}
let constraints = &config.constraints;
let ffn_gate_weight = if constraints.has_gate_ffn() {
Some(create_q4k_data(hidden_dim, intermediate_dim))
} else {
None
};
let ffn_norm_weight = if constraints.uses_rmsnorm() || !constraints.has_gate_ffn() {
Some(vec![1.0f32; hidden_dim])
} else {
None
};
let attn_norm_bias = if !constraints.uses_rmsnorm() {
Some(vec![0.0f32; hidden_dim])
} else {
None
};
let layers = (0..config.num_layers)
.map(|_| OwnedQuantizedLayer {
attn_norm_weight: vec![1.0f32; hidden_dim],
attn_norm_bias: attn_norm_bias.clone(),
qkv_weight: OwnedQKVWeights::Fused(create_q4k_data(hidden_dim, hidden_dim * 3)),
qkv_bias: None,
attn_output_weight: create_q4k_data(hidden_dim, hidden_dim),
attn_output_bias: None,
ffn_up_weight: create_q4k_data(hidden_dim, intermediate_dim),
ffn_up_bias: None,
ffn_down_weight: create_q4k_data(intermediate_dim, hidden_dim),
ffn_down_bias: None,
ffn_gate_weight: ffn_gate_weight.clone(),
ffn_gate_bias: None,
ffn_norm_weight: ffn_norm_weight.clone(),
ffn_norm_bias: None,
attn_q_norm_weight: None,
attn_k_norm_weight: None,
})
.collect();
let output_norm_bias = if !constraints.uses_rmsnorm() {
Some(vec![0.0f32; hidden_dim])
} else {
None
};
OwnedQuantizedModel {
config: config.clone(),
token_embedding: vec![0.1f32; vocab_size * hidden_dim],
position_embedding: None,
layers,
encoder_layers: vec![],
encoder_output_norm_weight: None,
encoder_output_norm_bias: None,
output_norm_weight: vec![1.0f32; hidden_dim],
output_norm_bias,
lm_head_weight: create_q4k_data(hidden_dim, vocab_size),
lm_head_bias: None,
#[cfg(feature = "cuda")]
cuda_executor: None,
#[cfg(feature = "cuda")]
cuda_kernel_count: std::sync::atomic::AtomicU64::new(0),
#[cfg(feature = "cuda")]
cached_weight_names: std::sync::Mutex::new(std::collections::HashSet::new()),
}
}