Docs.rs
realizar-0.8.5
docs.rs
About docs.rs
Badges
Builds
Metadata
Shorthand URLs
Download
Rustdoc JSON
Build queue
Privacy policy
Rust
Rust website
The Book
Standard Library API Reference
Rust by Example
The Cargo Guide
Clippy Documentation
realizar 0.8.5
Pure Rust ML inference engine built from scratch - model serving for GGUF and safetensors
Crate
Source
Builds
Feature flags
..
api_server.rs
apr_benchmark.rs
apr_gpu_benchmark.rs
apr_loading.rs
apr_mmap_loading.rs
attention_manual_verify.rs
bench_apr_gpu.rs
bench_apr_vs_gguf.rs
bench_barrier_overhead.rs
bench_batched_forward.rs
bench_batched_gemv.rs
bench_chunk_sizes.rs
bench_chunked_matmul.rs
bench_comparison.rs
bench_continuous_batching.rs
bench_cpu_util.rs
bench_flash_decoding.rs
bench_forward.rs
bench_gemv.rs
bench_kernel_variants.rs
bench_manual_threads.rs
bench_matmul_breakdown.rs
bench_matmul_only.rs
bench_multisequence_graph.rs
bench_q4k_detect.rs
bench_q4k_parallel.rs
bench_q4k_simd.rs
bench_q8k_speedup.rs
bench_qwen.rs
bench_rayon_overhead.rs
bench_realistic.rs
bench_scaling.rs
bench_scratch.rs
bench_seq_vs_par.rs
bench_simd_dot.rs
bench_speculative.rs
bench_tiled_q4k.rs
bench_toks.rs
bench_v2_kernel.rs
bench_vnni_vs_avx2.rs
benchmark_cpu.rs
brick_divergence_trace.rs
chat_template.rs
check_apr_tensors.rs
check_arch_detection.rs
check_bias.rs
check_bias_format.rs
check_bias_raw.rs
check_biases.rs
check_dims.rs
check_embed.rs
check_embed_layout.rs
check_embedding.rs
check_embeddings.rs
check_ffn_down_col_5475.rs
check_ffn_down_types.rs
check_ffn_down_weight.rs
check_ffn_hidden.rs
check_final_hidden.rs
check_forward.rs
check_gate_up_correlation.rs
check_gguf_meta.rs
check_gguf_metadata.rs
check_gguf_model_version.rs
check_gpu_logits.rs
check_idx_5475.rs
check_layer2_ffn_down.rs
check_layer4.rs
check_layer_structure.rs
check_layer_weights.rs
check_lm_head.rs
check_matmul.rs
check_newlines.rs
check_norm_weight.rs
check_norm_weights.rs
check_paris.rs
check_prefill_logits.rs
check_q4k_layout.rs
check_q8k_accuracy.rs
check_qkv_bias.rs
check_qkv_dims.rs
check_qkv_tensors.rs
check_qtype.rs
check_qtypes.rs
check_raw_bias.rs
check_raw_bias_v2.rs
check_raw_tensors.rs
check_rmsnorm_params.rs
check_space_token.rs
check_tensor_names.rs
check_tensor_order.rs
check_tensors.rs
check_theta.rs
check_token0.rs
check_token0_emb.rs
check_token_74403.rs
check_token_scores.rs
check_token_scores_v2.rs
check_tokenization.rs
check_tokenizer.rs
check_tokens.rs
check_v_weight.rs
check_v_weights.rs
check_weight_layout.rs
check_weight_scales.rs
check_weight_stats.rs
compare_all_layers.rs
compare_apr_gguf_forward.rs
compare_configs.rs
compare_cpu_gpu.rs
compare_cpu_paths.rs
compare_embed.rs
compare_embedding_index.rs
compare_forward_methods.rs
compare_forward_paths.rs
compare_hidden_before_norm.rs
compare_l0_v.rs
compare_layer0.rs
compare_layer0_full.rs
compare_layer_outputs.rs
compare_layers.rs
compare_lm_head_input.rs
compare_logits.rs
compare_matmul.rs
compare_matmul2.rs
compare_q4k_bytes.rs
compare_q_projection.rs
compare_qkv_structure.rs
compare_rope.rs
compare_v_weight.rs
compare_weights.rs
contract_pipeline_demo.rs
convert_and_bench_apr.rs
convert_apr_q4k.rs
cpu_hidden.rs
cpu_hidden_state_trace.rs
cpu_hidden_trace.rs
cuda_chat_completions.rs
cuda_debug.rs
debug_apr_divergence.rs
debug_apr_embedding.rs
debug_attention_output.rs
debug_chat_template.rs
debug_cpu_forward.rs
debug_cpu_gpu_divergence.rs
debug_early_layers.rs
debug_embed_test.rs
debug_embedding.rs
debug_embedding_qtype.rs
debug_embedding_simple.rs
debug_embeddings.rs
debug_first_q_proj.rs
debug_forward.rs
debug_forward_bias.rs
debug_gen_detailed.rs
debug_gpu_divergence.rs
debug_gpu_layer4.rs
debug_hidden_state.rs
debug_inference.rs
debug_layer0_compare.rs
debug_layer0_divergence.rs
debug_layer0_stepwise.rs
debug_layer0_trace.rs
debug_layer21_ffn.rs
debug_layer2_ffn.rs
debug_layer2_gate.rs
debug_layer_by_layer.rs
debug_layer_compare.rs
debug_lm_head.rs
debug_lm_head_direct.rs
debug_lm_head_divergence.rs
debug_lm_head_weights.rs
debug_matmul_convention.rs
debug_matvec_compare.rs
debug_normed_hidden.rs
debug_normed_hidden_compare.rs
debug_o_weight.rs
debug_o_weight_layout.rs
debug_pos1.rs
debug_ptx.rs
debug_q4_0.rs
debug_q4k_attn_output.rs
debug_q4k_controlled.rs
debug_q4k_down_weight.rs
debug_q4k_embedding_raw.rs
debug_q4k_gemv.rs
debug_q4k_gemv_layer0.rs
debug_q4k_gemv_tiled.rs
debug_q4k_real_input.rs
debug_q4k_rmsnorm_input.rs
debug_q6k_controlled.rs
debug_q6k_gemv.rs
debug_q6k_layout.rs
debug_q6k_lm_head_test.rs
debug_q6k_row.rs
debug_q6k_single_row.rs
debug_q_weight_compare.rs
debug_qkv_compare.rs
debug_qkv_matmul.rs
debug_qwen.rs
debug_rmsnorm_layer0.rs
debug_single_row.rs
debug_speculative.rs
debug_tensor_layout.rs
debug_tiled_q4k.rs
debug_v_weight.rs
debug_v_weight_layout.rs
debug_weights.rs
design_by_contract.rs
detailed_profile.rs
digit_combo_test.rs
dump_config.rs
dump_def_embedding.rs
dump_gguf_dims.rs
dump_layer0_tensors.rs
dump_q6k_ptx.rs
final_hidden_compare.rs
find_ffn_outliers.rs
fresh_compare.rs
gguf_debug.rs
gguf_loading.rs
gpu_gemm_benchmark.rs
gpu_kernel_test.rs
gpu_matvec_benchmark.rs
gpu_showcase_benchmark.rs
hidden_compare.rs
imp800_gpu_parity.rs
imp900_optimized_gpu.rs
imp_1010_full_cuda_benchmark.rs
imp_700_realworld_verification.rs
imp_701_performance_gap.rs
imp_800_kv_cache_falsification.rs
imp_801_flash_attention_falsification.rs
inference.rs
instrumented_forward.rs
layer0_attention_compare.rs
layer0_step_compare.rs
layer_by_layer_debug.rs
layer_by_layer_trace.rs
layer_compare.rs
layer_profile.rs
measure_forward_time.rs
micro_profile.rs
model_cache.rs
observability_demo.rs
par_001_check_dims.rs
par_001_check_embed.rs
par_001_check_embeddings.rs
par_001_check_lm_head.rs
par_001_check_output_norm.rs
par_001_check_q6k_superblocks.rs
par_001_compare_data.rs
par_001_debug_forward.rs
par_001_full_forward.rs
par_001_fused_vs_naive.rs
par_001_list_q6k.rs
par_001_qkv_parity.rs
par_001_qv_comparison.rs
par_001_tensor_offset.rs
par_001_test_chat.rs
par_001_test_chatml.rs
par_001_test_math.rs
par_001_trace_all_layers.rs
par_001_trace_hidden.rs
par_001_trace_layers.rs
par_001_trace_pos1.rs
par_001_trace_token.rs
par_001_trace_v.rs
par_001_transpose_test.rs
par_001_verify_colmajor.rs
par_001_verify_ffn_down.rs
par_001_verify_lm_head.rs
par_001_verify_q4k_dot.rs
par_001_verify_q4k_matvec.rs
par_001_verify_q6k_rowmajor.rs
par_001_verify_v_real.rs
par_001_weight_alignment.rs
parity_035_m4_verification.rs
parity_036_gpu_attention.rs
parity_038_async_streams.rs
parity_039_flash_attention.rs
parity_040_fp16_attention.rs
performance_parity.rs
pipeline_tui.rs
pmat_benchmark_matrix.rs
position_trace.rs
predict_after_layer1.rs
profile_7b.rs
profile_all_layers.rs
profile_all_matmuls.rs
profile_attention.rs
profile_cached_forward.rs
profile_cpu_breakdown.rs
profile_cpu_kernel.rs
profile_detailed.rs
profile_ffn.rs
profile_forward_breakdown.rs
profile_forward_detailed.rs
profile_forward_instrumented.rs
profile_forward_pass.rs
profile_full_forward.rs
profile_inference.rs
profile_layer_breakdown.rs
profile_lm_head.rs
profile_matmul_cold.rs
profile_matmul_detail.rs
profile_matmul_sizes.rs
profile_non_matmul.rs
profile_phi2_simple.rs
profile_q8k_quant.rs
profile_rayon_overhead.rs
profile_scratch_vs_cache.rs
q6k_bench.rs
quick_generate.rs
qwen_apr_demo.rs
README.md
safetensors_loading.rs
test_0_5b_raw.rs
test_1_5b_raw.rs
test_apr_q4k_forward.rs
test_apr_q4k_generate.rs
test_apr_quantized_cache.rs
test_attention_debug.rs
test_attention_phi2_dims.rs
test_chat_format.rs
test_coherence.rs
test_cpu_chat.rs
test_cuda_minimal.rs
test_ffn_q4k.rs
test_forward.rs
test_gemv_correctness.rs
test_generation.rs
test_gguf_baseline.rs
test_gpu_bias.rs
test_graphed.rs
test_inference.rs
test_lm_head_direct.rs
test_lm_head_only.rs
test_m16.rs
test_multiple_prompts.rs
test_no_bias.rs
test_ollama_match.rs
test_q4_0_parity.rs
test_q4k_cuda.rs
test_q4k_phi2_dims.rs
test_q6k_correctness.rs
test_q6k_gemv_direct.rs
test_q6k_single_row.rs
test_qkv_matmul.rs
test_qkv_with_bias.rs
test_qwen_prompt.rs
test_qwen_seq.rs
test_rope_override.rs
test_tc_attention.rs
test_tiled_vs_cpu.rs
test_tinyllama.rs
test_tinyllama_simple.rs
test_transpose.rs
test_v_matvec.rs
tinyllama_survey.rs
token_survey.rs
tokenization.rs
trace_2plus2.rs
trace_all_layers.rs
trace_attention.rs
trace_attention_scores.rs
trace_attn_weights.rs
trace_buggy_vs_ok.rs
trace_causal_attn.rs
trace_cpu_forward.rs
trace_divergence.rs
trace_final.rs
trace_final_hidden.rs
trace_forward.rs
trace_forward_detailed.rs
trace_full_layer0.rs
trace_hidden_flow.rs
trace_layer0_detailed.rs
trace_layer0_qkv.rs
trace_layer0_qkv_fixed.rs
trace_layer21.rs
trace_layer2_detail.rs
trace_lm_head.rs
trace_pos1_divergence.rs
trace_qkv_output.rs
trace_qkv_step.rs
trace_qwen_qkv.rs
trace_safetensors.rs
trace_single_layer.rs
trace_single_token_full.rs
trace_single_vs_multi.rs
trueno_ab_test.rs
trueno_dot_test.rs
validated_tensors.rs
verify_apr_q4k.rs
verify_attention_kernel.rs
verify_embed.rs
verify_embedding.rs
verify_embeddings.rs
verify_input_token.rs
verify_lm_head.rs
verify_offsets.rs
verify_q4_0_correct.rs
verify_q4_0_full.rs
verify_q4_0_matmul.rs
verify_q4k_layout.rs
verify_q8_0.rs
verify_rmsnorm.rs
verify_rope.rs
verify_rope_pos0.rs
verify_rope_type.rs
verify_tensor_offsets.rs
verify_tinyllama_multitoken.rs
verify_v_parallel_matvec.rs
weight_check.rs
wgpu_parity_test.rs
wine_lambda.rs