[package]
edition = "2021"
rust-version = "1.89"
name = "realizar"
version = "0.8.1"
authors = ["Pragmatic AI Labs <contact@paiml.com>"]
build = "build.rs"
exclude = [
"artifacts/",
"models/",
"docs/",
"tests/",
"benches/",
"book/",
"target/",
".pmat/",
".pmat-work/",
".vscode/",
".idea/",
"*.profraw",
"*.profdata",
"proptest-regressions/",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Pure Rust ML inference engine built from scratch - model serving for GGUF and safetensors"
homepage = "https://github.com/paiml/realizar"
documentation = "https://docs.rs/realizar"
readme = "README.md"
keywords = [
"machine-learning",
"inference",
"model-serving",
"gguf",
"transformer",
]
categories = [
"science",
"web-programming::http-server",
]
license = "MIT"
repository = "https://github.com/paiml/realizar"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--generate-link-to-definition"]
[package.metadata.release]
shared-version = true
[[package.metadata.release.pre-release-replacements]]
file = "CHANGELOG.md"
search = '## \[Unreleased\]'
replace = "## [{{version}}] - {{date}}"
[features]
alimentar-data = ["dep:alimentar"]
apr-compression = [
"dep:lz4_flex",
"dep:zstd",
]
aprender-serve = [
"dep:aprender",
"server",
]
bench-http = ["dep:reqwest"]
cli = [
"dep:clap",
"dep:presentar-terminal",
"server",
]
cuda = [
"dep:trueno-gpu",
"dep:tracing",
]
default = [
"server",
"cli",
"gpu",
]
full = [
"server",
"cli",
"gpu",
"cuda",
"registry",
]
gpu = ["trueno/gpu"]
heavy-tests = []
kv-cache = ["dep:trueno-db"]
lambda = ["dep:ureq"]
load-test-enabled = ["server"]
minimal = []
registry = ["dep:pacha"]
server = [
"dep:axum",
"dep:tokio",
"dep:tokio-stream",
"dep:tower",
"dep:futures",
"dep:async-stream",
"dep:arc-swap",
]
trace = []
tui = [
"dep:ratatui",
"dep:crossterm",
"dep:ureq",
"dep:clap",
]
visualization = ["dep:trueno-viz"]
[lib]
name = "realizar"
path = "src/lib.rs"
[[bin]]
name = "mnist_lambda"
path = "src/bin/mnist_lambda.rs"
required-features = [
"aprender-serve",
"lambda",
]
[[bin]]
name = "realizar"
path = "src/main.rs"
required-features = ["cli"]
[[bin]]
name = "realizar-monitor"
path = "src/bin/realizar_monitor/main.rs"
required-features = ["tui"]
[[bin]]
name = "wine_lambda"
path = "src/bin/wine_lambda.rs"
required-features = ["lambda"]
[[example]]
name = "api_server"
path = "examples/api_server.rs"
[[example]]
name = "apr_benchmark"
path = "examples/apr_benchmark.rs"
[[example]]
name = "apr_gpu_benchmark"
path = "examples/apr_gpu_benchmark.rs"
required-features = ["cuda"]
[[example]]
name = "apr_loading"
path = "examples/apr_loading.rs"
[[example]]
name = "apr_mmap_loading"
path = "examples/apr_mmap_loading.rs"
[[example]]
name = "attention_manual_verify"
path = "examples/attention_manual_verify.rs"
[[example]]
name = "bench_apr_gpu"
path = "examples/bench_apr_gpu.rs"
required-features = ["cuda"]
[[example]]
name = "bench_apr_vs_gguf"
path = "examples/bench_apr_vs_gguf.rs"
required-features = ["cuda"]
[[example]]
name = "bench_barrier_overhead"
path = "examples/bench_barrier_overhead.rs"
[[example]]
name = "bench_batched_forward"
path = "examples/bench_batched_forward.rs"
required-features = ["cuda"]
[[example]]
name = "bench_batched_gemv"
path = "examples/bench_batched_gemv.rs"
required-features = ["cuda"]
[[example]]
name = "bench_chunk_sizes"
path = "examples/bench_chunk_sizes.rs"
[[example]]
name = "bench_chunked_matmul"
path = "examples/bench_chunked_matmul.rs"
[[example]]
name = "bench_comparison"
path = "examples/bench_comparison.rs"
[[example]]
name = "bench_continuous_batching"
path = "examples/bench_continuous_batching.rs"
required-features = ["cuda"]
[[example]]
name = "bench_cpu_util"
path = "examples/bench_cpu_util.rs"
[[example]]
name = "bench_flash_decoding"
path = "examples/bench_flash_decoding.rs"
required-features = ["cuda"]
[[example]]
name = "bench_forward"
path = "examples/bench_forward.rs"
[[example]]
name = "bench_gemv"
path = "examples/bench_gemv.rs"
required-features = ["cuda"]
[[example]]
name = "bench_kernel_variants"
path = "examples/bench_kernel_variants.rs"
[[example]]
name = "bench_manual_threads"
path = "examples/bench_manual_threads.rs"
[[example]]
name = "bench_matmul_breakdown"
path = "examples/bench_matmul_breakdown.rs"
[[example]]
name = "bench_matmul_only"
path = "examples/bench_matmul_only.rs"
[[example]]
name = "bench_multisequence_graph"
path = "examples/bench_multisequence_graph.rs"
required-features = ["cuda"]
[[example]]
name = "bench_q4k_detect"
path = "examples/bench_q4k_detect.rs"
[[example]]
name = "bench_q4k_parallel"
path = "examples/bench_q4k_parallel.rs"
[[example]]
name = "bench_q4k_simd"
path = "examples/bench_q4k_simd.rs"
[[example]]
name = "bench_q8k_speedup"
path = "examples/bench_q8k_speedup.rs"
[[example]]
name = "bench_qwen"
path = "examples/bench_qwen.rs"
[[example]]
name = "bench_rayon_overhead"
path = "examples/bench_rayon_overhead.rs"
[[example]]
name = "bench_realistic"
path = "examples/bench_realistic.rs"
[[example]]
name = "bench_scaling"
path = "examples/bench_scaling.rs"
[[example]]
name = "bench_scratch"
path = "examples/bench_scratch.rs"
[[example]]
name = "bench_seq_vs_par"
path = "examples/bench_seq_vs_par.rs"
[[example]]
name = "bench_simd_dot"
path = "examples/bench_simd_dot.rs"
[[example]]
name = "bench_speculative"
path = "examples/bench_speculative.rs"
required-features = ["cuda"]
[[example]]
name = "bench_tiled_q4k"
path = "examples/bench_tiled_q4k.rs"
required-features = ["cuda"]
[[example]]
name = "bench_toks"
path = "examples/bench_toks.rs"
[[example]]
name = "bench_v2_kernel"
path = "examples/bench_v2_kernel.rs"
[[example]]
name = "bench_vnni_vs_avx2"
path = "examples/bench_vnni_vs_avx2.rs"
[[example]]
name = "benchmark_cpu"
path = "examples/benchmark_cpu.rs"
[[example]]
name = "brick_divergence_trace"
path = "examples/brick_divergence_trace.rs"
required-features = ["cuda"]
[[example]]
name = "chat_template"
path = "examples/chat_template.rs"
[[example]]
name = "check_apr_tensors"
path = "examples/check_apr_tensors.rs"
[[example]]
name = "check_arch_detection"
path = "examples/check_arch_detection.rs"
[[example]]
name = "check_bias"
path = "examples/check_bias.rs"
[[example]]
name = "check_bias_format"
path = "examples/check_bias_format.rs"
[[example]]
name = "check_bias_raw"
path = "examples/check_bias_raw.rs"
[[example]]
name = "check_biases"
path = "examples/check_biases.rs"
[[example]]
name = "check_dims"
path = "examples/check_dims.rs"
[[example]]
name = "check_embed"
path = "examples/check_embed.rs"
[[example]]
name = "check_embed_layout"
path = "examples/check_embed_layout.rs"
[[example]]
name = "check_embedding"
path = "examples/check_embedding.rs"
[[example]]
name = "check_embeddings"
path = "examples/check_embeddings.rs"
[[example]]
name = "check_ffn_down_col_5475"
path = "examples/check_ffn_down_col_5475.rs"
[[example]]
name = "check_ffn_down_types"
path = "examples/check_ffn_down_types.rs"
[[example]]
name = "check_ffn_down_weight"
path = "examples/check_ffn_down_weight.rs"
[[example]]
name = "check_ffn_hidden"
path = "examples/check_ffn_hidden.rs"
[[example]]
name = "check_final_hidden"
path = "examples/check_final_hidden.rs"
[[example]]
name = "check_forward"
path = "examples/check_forward.rs"
[[example]]
name = "check_gate_up_correlation"
path = "examples/check_gate_up_correlation.rs"
[[example]]
name = "check_gguf_meta"
path = "examples/check_gguf_meta.rs"
[[example]]
name = "check_gguf_metadata"
path = "examples/check_gguf_metadata.rs"
[[example]]
name = "check_gguf_model_version"
path = "examples/check_gguf_model_version.rs"
[[example]]
name = "check_gpu_logits"
path = "examples/check_gpu_logits.rs"
required-features = ["cuda"]
[[example]]
name = "check_idx_5475"
path = "examples/check_idx_5475.rs"
[[example]]
name = "check_layer2_ffn_down"
path = "examples/check_layer2_ffn_down.rs"
[[example]]
name = "check_layer4"
path = "examples/check_layer4.rs"
[[example]]
name = "check_layer_structure"
path = "examples/check_layer_structure.rs"
[[example]]
name = "check_layer_weights"
path = "examples/check_layer_weights.rs"
[[example]]
name = "check_lm_head"
path = "examples/check_lm_head.rs"
[[example]]
name = "check_matmul"
path = "examples/check_matmul.rs"
[[example]]
name = "check_newlines"
path = "examples/check_newlines.rs"
[[example]]
name = "check_norm_weight"
path = "examples/check_norm_weight.rs"
[[example]]
name = "check_norm_weights"
path = "examples/check_norm_weights.rs"
[[example]]
name = "check_paris"
path = "examples/check_paris.rs"
[[example]]
name = "check_prefill_logits"
path = "examples/check_prefill_logits.rs"
[[example]]
name = "check_q4k_layout"
path = "examples/check_q4k_layout.rs"
[[example]]
name = "check_q8k_accuracy"
path = "examples/check_q8k_accuracy.rs"
[[example]]
name = "check_qkv_bias"
path = "examples/check_qkv_bias.rs"
[[example]]
name = "check_qkv_dims"
path = "examples/check_qkv_dims.rs"
[[example]]
name = "check_qkv_tensors"
path = "examples/check_qkv_tensors.rs"
[[example]]
name = "check_qtype"
path = "examples/check_qtype.rs"
[[example]]
name = "check_qtypes"
path = "examples/check_qtypes.rs"
[[example]]
name = "check_raw_bias"
path = "examples/check_raw_bias.rs"
[[example]]
name = "check_raw_bias_v2"
path = "examples/check_raw_bias_v2.rs"
[[example]]
name = "check_raw_tensors"
path = "examples/check_raw_tensors.rs"
[[example]]
name = "check_rmsnorm_params"
path = "examples/check_rmsnorm_params.rs"
[[example]]
name = "check_space_token"
path = "examples/check_space_token.rs"
[[example]]
name = "check_tensor_names"
path = "examples/check_tensor_names.rs"
[[example]]
name = "check_tensor_order"
path = "examples/check_tensor_order.rs"
[[example]]
name = "check_tensors"
path = "examples/check_tensors.rs"
[[example]]
name = "check_theta"
path = "examples/check_theta.rs"
[[example]]
name = "check_token0"
path = "examples/check_token0.rs"
[[example]]
name = "check_token0_emb"
path = "examples/check_token0_emb.rs"
[[example]]
name = "check_token_74403"
path = "examples/check_token_74403.rs"
required-features = ["cuda"]
[[example]]
name = "check_token_scores"
path = "examples/check_token_scores.rs"
[[example]]
name = "check_token_scores_v2"
path = "examples/check_token_scores_v2.rs"
[[example]]
name = "check_tokenization"
path = "examples/check_tokenization.rs"
[[example]]
name = "check_tokenizer"
path = "examples/check_tokenizer.rs"
[[example]]
name = "check_tokens"
path = "examples/check_tokens.rs"
[[example]]
name = "check_v_weight"
path = "examples/check_v_weight.rs"
[[example]]
name = "check_v_weights"
path = "examples/check_v_weights.rs"
[[example]]
name = "check_weight_layout"
path = "examples/check_weight_layout.rs"
[[example]]
name = "check_weight_scales"
path = "examples/check_weight_scales.rs"
[[example]]
name = "check_weight_stats"
path = "examples/check_weight_stats.rs"
[[example]]
name = "compare_all_layers"
path = "examples/compare_all_layers.rs"
[[example]]
name = "compare_apr_gguf_forward"
path = "examples/compare_apr_gguf_forward.rs"
[[example]]
name = "compare_configs"
path = "examples/compare_configs.rs"
[[example]]
name = "compare_cpu_gpu"
path = "examples/compare_cpu_gpu.rs"
required-features = ["cuda"]
[[example]]
name = "compare_cpu_paths"
path = "examples/compare_cpu_paths.rs"
[[example]]
name = "compare_embed"
path = "examples/compare_embed.rs"
[[example]]
name = "compare_embedding_index"
path = "examples/compare_embedding_index.rs"
[[example]]
name = "compare_forward_methods"
path = "examples/compare_forward_methods.rs"
[[example]]
name = "compare_forward_paths"
path = "examples/compare_forward_paths.rs"
[[example]]
name = "compare_hidden_before_norm"
path = "examples/compare_hidden_before_norm.rs"
required-features = ["cuda"]
[[example]]
name = "compare_l0_v"
path = "examples/compare_l0_v.rs"
[[example]]
name = "compare_layer0"
path = "examples/compare_layer0.rs"
[[example]]
name = "compare_layer0_full"
path = "examples/compare_layer0_full.rs"
required-features = ["cuda"]
[[example]]
name = "compare_layer_outputs"
path = "examples/compare_layer_outputs.rs"
[[example]]
name = "compare_layers"
path = "examples/compare_layers.rs"
required-features = ["cuda"]
[[example]]
name = "compare_lm_head_input"
path = "examples/compare_lm_head_input.rs"
required-features = ["cuda"]
[[example]]
name = "compare_logits"
path = "examples/compare_logits.rs"
[[example]]
name = "compare_matmul"
path = "examples/compare_matmul.rs"
[[example]]
name = "compare_matmul2"
path = "examples/compare_matmul2.rs"
[[example]]
name = "compare_q4k_bytes"
path = "examples/compare_q4k_bytes.rs"
[[example]]
name = "compare_q_projection"
path = "examples/compare_q_projection.rs"
required-features = ["cuda"]
[[example]]
name = "compare_qkv_structure"
path = "examples/compare_qkv_structure.rs"
[[example]]
name = "compare_rope"
path = "examples/compare_rope.rs"
[[example]]
name = "compare_v_weight"
path = "examples/compare_v_weight.rs"
[[example]]
name = "compare_weights"
path = "examples/compare_weights.rs"
required-features = ["cuda"]
[[example]]
name = "convert_and_bench_apr"
path = "examples/convert_and_bench_apr.rs"
[[example]]
name = "convert_apr_q4k"
path = "examples/convert_apr_q4k.rs"
[[example]]
name = "cpu_hidden"
path = "examples/cpu_hidden.rs"
[[example]]
name = "cpu_hidden_state_trace"
path = "examples/cpu_hidden_state_trace.rs"
[[example]]
name = "cpu_hidden_trace"
path = "examples/cpu_hidden_trace.rs"
[[example]]
name = "cuda_chat_completions"
path = "examples/cuda_chat_completions.rs"
required-features = ["cuda"]
[[example]]
name = "cuda_debug"
path = "examples/cuda_debug.rs"
required-features = ["cuda"]
[[example]]
name = "debug_apr_divergence"
path = "examples/debug_apr_divergence.rs"
[[example]]
name = "debug_apr_embedding"
path = "examples/debug_apr_embedding.rs"
[[example]]
name = "debug_attention_output"
path = "examples/debug_attention_output.rs"
[[example]]
name = "debug_chat_template"
path = "examples/debug_chat_template.rs"
[[example]]
name = "debug_cpu_forward"
path = "examples/debug_cpu_forward.rs"
[[example]]
name = "debug_cpu_gpu_divergence"
path = "examples/debug_cpu_gpu_divergence.rs"
required-features = ["cuda"]
[[example]]
name = "debug_early_layers"
path = "examples/debug_early_layers.rs"
[[example]]
name = "debug_embed_test"
path = "examples/debug_embed_test.rs"
[[example]]
name = "debug_embedding"
path = "examples/debug_embedding.rs"
[[example]]
name = "debug_embedding_qtype"
path = "examples/debug_embedding_qtype.rs"
[[example]]
name = "debug_embedding_simple"
path = "examples/debug_embedding_simple.rs"
[[example]]
name = "debug_embeddings"
path = "examples/debug_embeddings.rs"
[[example]]
name = "debug_first_q_proj"
path = "examples/debug_first_q_proj.rs"
[[example]]
name = "debug_forward"
path = "examples/debug_forward.rs"
[[example]]
name = "debug_forward_bias"
path = "examples/debug_forward_bias.rs"
[[example]]
name = "debug_gen_detailed"
path = "examples/debug_gen_detailed.rs"
[[example]]
name = "debug_gpu_divergence"
path = "examples/debug_gpu_divergence.rs"
required-features = ["cuda"]
[[example]]
name = "debug_gpu_layer4"
path = "examples/debug_gpu_layer4.rs"
[[example]]
name = "debug_hidden_state"
path = "examples/debug_hidden_state.rs"
required-features = ["cuda"]
[[example]]
name = "debug_inference"
path = "examples/debug_inference.rs"
[[example]]
name = "debug_layer0_compare"
path = "examples/debug_layer0_compare.rs"
required-features = ["cuda"]
[[example]]
name = "debug_layer0_divergence"
path = "examples/debug_layer0_divergence.rs"
required-features = ["cuda"]
[[example]]
name = "debug_layer0_stepwise"
path = "examples/debug_layer0_stepwise.rs"
required-features = ["cuda"]
[[example]]
name = "debug_layer0_trace"
path = "examples/debug_layer0_trace.rs"
[[example]]
name = "debug_layer21_ffn"
path = "examples/debug_layer21_ffn.rs"
[[example]]
name = "debug_layer2_ffn"
path = "examples/debug_layer2_ffn.rs"
[[example]]
name = "debug_layer2_gate"
path = "examples/debug_layer2_gate.rs"
[[example]]
name = "debug_layer_by_layer"
path = "examples/debug_layer_by_layer.rs"
required-features = ["cuda"]
[[example]]
name = "debug_layer_compare"
path = "examples/debug_layer_compare.rs"
[[example]]
name = "debug_lm_head"
path = "examples/debug_lm_head.rs"
required-features = ["cuda"]
[[example]]
name = "debug_lm_head_direct"
path = "examples/debug_lm_head_direct.rs"
required-features = ["cuda"]
[[example]]
name = "debug_lm_head_divergence"
path = "examples/debug_lm_head_divergence.rs"
required-features = ["cuda"]
[[example]]
name = "debug_lm_head_weights"
path = "examples/debug_lm_head_weights.rs"
[[example]]
name = "debug_matmul_convention"
path = "examples/debug_matmul_convention.rs"
[[example]]
name = "debug_matvec_compare"
path = "examples/debug_matvec_compare.rs"
[[example]]
name = "debug_normed_hidden"
path = "examples/debug_normed_hidden.rs"
required-features = ["cuda"]
[[example]]
name = "debug_normed_hidden_compare"
path = "examples/debug_normed_hidden_compare.rs"
required-features = ["cuda"]
[[example]]
name = "debug_o_weight"
path = "examples/debug_o_weight.rs"
[[example]]
name = "debug_o_weight_layout"
path = "examples/debug_o_weight_layout.rs"
[[example]]
name = "debug_pos1"
path = "examples/debug_pos1.rs"
required-features = ["cuda"]
[[example]]
name = "debug_ptx"
path = "examples/debug_ptx.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q4_0"
path = "examples/debug_q4_0.rs"
[[example]]
name = "debug_q4k_attn_output"
path = "examples/debug_q4k_attn_output.rs"
[[example]]
name = "debug_q4k_controlled"
path = "examples/debug_q4k_controlled.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q4k_down_weight"
path = "examples/debug_q4k_down_weight.rs"
[[example]]
name = "debug_q4k_embedding_raw"
path = "examples/debug_q4k_embedding_raw.rs"
[[example]]
name = "debug_q4k_gemv"
path = "examples/debug_q4k_gemv.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q4k_gemv_layer0"
path = "examples/debug_q4k_gemv_layer0.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q4k_gemv_tiled"
path = "examples/debug_q4k_gemv_tiled.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q4k_real_input"
path = "examples/debug_q4k_real_input.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q4k_rmsnorm_input"
path = "examples/debug_q4k_rmsnorm_input.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q6k_controlled"
path = "examples/debug_q6k_controlled.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q6k_gemv"
path = "examples/debug_q6k_gemv.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q6k_layout"
path = "examples/debug_q6k_layout.rs"
[[example]]
name = "debug_q6k_lm_head_test"
path = "examples/debug_q6k_lm_head_test.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q6k_row"
path = "examples/debug_q6k_row.rs"
required-features = ["cuda"]
[[example]]
name = "debug_q6k_single_row"
path = "examples/debug_q6k_single_row.rs"
[[example]]
name = "debug_q_weight_compare"
path = "examples/debug_q_weight_compare.rs"
[[example]]
name = "debug_qkv_compare"
path = "examples/debug_qkv_compare.rs"
[[example]]
name = "debug_qkv_matmul"
path = "examples/debug_qkv_matmul.rs"
[[example]]
name = "debug_qwen"
path = "examples/debug_qwen.rs"
[[example]]
name = "debug_rmsnorm_layer0"
path = "examples/debug_rmsnorm_layer0.rs"
required-features = ["cuda"]
[[example]]
name = "debug_single_row"
path = "examples/debug_single_row.rs"
required-features = ["cuda"]
[[example]]
name = "debug_speculative"
path = "examples/debug_speculative.rs"
required-features = ["cuda"]
[[example]]
name = "debug_tensor_layout"
path = "examples/debug_tensor_layout.rs"
[[example]]
name = "debug_tiled_q4k"
path = "examples/debug_tiled_q4k.rs"
required-features = ["cuda"]
[[example]]
name = "debug_v_weight"
path = "examples/debug_v_weight.rs"
[[example]]
name = "debug_v_weight_layout"
path = "examples/debug_v_weight_layout.rs"
[[example]]
name = "debug_weights"
path = "examples/debug_weights.rs"
[[example]]
name = "design_by_contract"
path = "examples/design_by_contract.rs"
[[example]]
name = "detailed_profile"
path = "examples/detailed_profile.rs"
[[example]]
name = "digit_combo_test"
path = "examples/digit_combo_test.rs"
[[example]]
name = "dump_config"
path = "examples/dump_config.rs"
[[example]]
name = "dump_def_embedding"
path = "examples/dump_def_embedding.rs"
[[example]]
name = "dump_gguf_dims"
path = "examples/dump_gguf_dims.rs"
[[example]]
name = "dump_layer0_tensors"
path = "examples/dump_layer0_tensors.rs"
[[example]]
name = "dump_q6k_ptx"
path = "examples/dump_q6k_ptx.rs"
required-features = ["cuda"]
[[example]]
name = "final_hidden_compare"
path = "examples/final_hidden_compare.rs"
required-features = ["cuda"]
[[example]]
name = "find_ffn_outliers"
path = "examples/find_ffn_outliers.rs"
[[example]]
name = "fresh_compare"
path = "examples/fresh_compare.rs"
required-features = ["cuda"]
[[example]]
name = "gguf_debug"
path = "examples/gguf_debug.rs"
[[example]]
name = "gguf_loading"
path = "examples/gguf_loading.rs"
[[example]]
name = "gpu_gemm_benchmark"
path = "examples/gpu_gemm_benchmark.rs"
[[example]]
name = "gpu_matvec_benchmark"
path = "examples/gpu_matvec_benchmark.rs"
[[example]]
name = "gpu_showcase_benchmark"
path = "examples/gpu_showcase_benchmark.rs"
required-features = ["cuda"]
[[example]]
name = "hidden_compare"
path = "examples/hidden_compare.rs"
required-features = ["cuda"]
[[example]]
name = "imp800_gpu_parity"
path = "examples/imp800_gpu_parity.rs"
required-features = ["cuda"]
[[example]]
name = "imp900_optimized_gpu"
path = "examples/imp900_optimized_gpu.rs"
required-features = ["cuda"]
[[example]]
name = "imp_1010_full_cuda_benchmark"
path = "examples/imp_1010_full_cuda_benchmark.rs"
required-features = ["cuda"]
[[example]]
name = "imp_700_realworld_verification"
path = "examples/imp_700_realworld_verification.rs"
[[example]]
name = "imp_701_performance_gap"
path = "examples/imp_701_performance_gap.rs"
[[example]]
name = "imp_800_kv_cache_falsification"
path = "examples/imp_800_kv_cache_falsification.rs"
[[example]]
name = "imp_801_flash_attention_falsification"
path = "examples/imp_801_flash_attention_falsification.rs"
required-features = ["cuda"]
[[example]]
name = "inference"
path = "examples/inference.rs"
[[example]]
name = "instrumented_forward"
path = "examples/instrumented_forward.rs"
[[example]]
name = "layer0_attention_compare"
path = "examples/layer0_attention_compare.rs"
required-features = ["cuda"]
[[example]]
name = "layer0_step_compare"
path = "examples/layer0_step_compare.rs"
required-features = ["cuda"]
[[example]]
name = "layer_by_layer_debug"
path = "examples/layer_by_layer_debug.rs"
[[example]]
name = "layer_by_layer_trace"
path = "examples/layer_by_layer_trace.rs"
required-features = ["cuda"]
[[example]]
name = "layer_compare"
path = "examples/layer_compare.rs"
[[example]]
name = "layer_profile"
path = "examples/layer_profile.rs"
[[example]]
name = "measure_forward_time"
path = "examples/measure_forward_time.rs"
[[example]]
name = "micro_profile"
path = "examples/micro_profile.rs"
[[example]]
name = "model_cache"
path = "examples/model_cache.rs"
[[example]]
name = "observability_demo"
path = "examples/observability_demo.rs"
[[example]]
name = "par_001_check_dims"
path = "examples/par_001_check_dims.rs"
[[example]]
name = "par_001_check_embed"
path = "examples/par_001_check_embed.rs"
[[example]]
name = "par_001_check_embeddings"
path = "examples/par_001_check_embeddings.rs"
[[example]]
name = "par_001_check_lm_head"
path = "examples/par_001_check_lm_head.rs"
[[example]]
name = "par_001_check_output_norm"
path = "examples/par_001_check_output_norm.rs"
[[example]]
name = "par_001_check_q6k_superblocks"
path = "examples/par_001_check_q6k_superblocks.rs"
[[example]]
name = "par_001_compare_data"
path = "examples/par_001_compare_data.rs"
[[example]]
name = "par_001_debug_forward"
path = "examples/par_001_debug_forward.rs"
[[example]]
name = "par_001_full_forward"
path = "examples/par_001_full_forward.rs"
[[example]]
name = "par_001_fused_vs_naive"
path = "examples/par_001_fused_vs_naive.rs"
[[example]]
name = "par_001_list_q6k"
path = "examples/par_001_list_q6k.rs"
[[example]]
name = "par_001_qkv_parity"
path = "examples/par_001_qkv_parity.rs"
[[example]]
name = "par_001_qv_comparison"
path = "examples/par_001_qv_comparison.rs"
[[example]]
name = "par_001_tensor_offset"
path = "examples/par_001_tensor_offset.rs"
[[example]]
name = "par_001_test_chat"
path = "examples/par_001_test_chat.rs"
[[example]]
name = "par_001_test_chatml"
path = "examples/par_001_test_chatml.rs"
[[example]]
name = "par_001_test_math"
path = "examples/par_001_test_math.rs"
[[example]]
name = "par_001_trace_all_layers"
path = "examples/par_001_trace_all_layers.rs"
[[example]]
name = "par_001_trace_hidden"
path = "examples/par_001_trace_hidden.rs"
[[example]]
name = "par_001_trace_layers"
path = "examples/par_001_trace_layers.rs"
[[example]]
name = "par_001_trace_pos1"
path = "examples/par_001_trace_pos1.rs"
[[example]]
name = "par_001_trace_token"
path = "examples/par_001_trace_token.rs"
[[example]]
name = "par_001_trace_v"
path = "examples/par_001_trace_v.rs"
[[example]]
name = "par_001_transpose_test"
path = "examples/par_001_transpose_test.rs"
[[example]]
name = "par_001_verify_colmajor"
path = "examples/par_001_verify_colmajor.rs"
[[example]]
name = "par_001_verify_ffn_down"
path = "examples/par_001_verify_ffn_down.rs"
[[example]]
name = "par_001_verify_lm_head"
path = "examples/par_001_verify_lm_head.rs"
[[example]]
name = "par_001_verify_q4k_dot"
path = "examples/par_001_verify_q4k_dot.rs"
[[example]]
name = "par_001_verify_q4k_matvec"
path = "examples/par_001_verify_q4k_matvec.rs"
[[example]]
name = "par_001_verify_q6k_rowmajor"
path = "examples/par_001_verify_q6k_rowmajor.rs"
[[example]]
name = "par_001_verify_v_real"
path = "examples/par_001_verify_v_real.rs"
[[example]]
name = "par_001_weight_alignment"
path = "examples/par_001_weight_alignment.rs"
[[example]]
name = "parity_035_m4_verification"
path = "examples/parity_035_m4_verification.rs"
[[example]]
name = "parity_036_gpu_attention"
path = "examples/parity_036_gpu_attention.rs"
required-features = ["cuda"]
[[example]]
name = "parity_038_async_streams"
path = "examples/parity_038_async_streams.rs"
required-features = ["cuda"]
[[example]]
name = "parity_039_flash_attention"
path = "examples/parity_039_flash_attention.rs"
required-features = ["cuda"]
[[example]]
name = "parity_040_fp16_attention"
path = "examples/parity_040_fp16_attention.rs"
required-features = ["cuda"]
[[example]]
name = "performance_parity"
path = "examples/performance_parity.rs"
[[example]]
name = "pipeline_tui"
path = "examples/pipeline_tui.rs"
[[example]]
name = "pmat_benchmark_matrix"
path = "examples/pmat_benchmark_matrix.rs"
required-features = ["cuda"]
[[example]]
name = "position_trace"
path = "examples/position_trace.rs"
[[example]]
name = "predict_after_layer1"
path = "examples/predict_after_layer1.rs"
[[example]]
name = "profile_7b"
path = "examples/profile_7b.rs"
required-features = ["cuda"]
[[example]]
name = "profile_all_layers"
path = "examples/profile_all_layers.rs"
[[example]]
name = "profile_all_matmuls"
path = "examples/profile_all_matmuls.rs"
[[example]]
name = "profile_attention"
path = "examples/profile_attention.rs"
[[example]]
name = "profile_cached_forward"
path = "examples/profile_cached_forward.rs"
[[example]]
name = "profile_cpu_breakdown"
path = "examples/profile_cpu_breakdown.rs"
[[example]]
name = "profile_cpu_kernel"
path = "examples/profile_cpu_kernel.rs"
[[example]]
name = "profile_detailed"
path = "examples/profile_detailed.rs"
[[example]]
name = "profile_ffn"
path = "examples/profile_ffn.rs"
[[example]]
name = "profile_forward_breakdown"
path = "examples/profile_forward_breakdown.rs"
[[example]]
name = "profile_forward_detailed"
path = "examples/profile_forward_detailed.rs"
[[example]]
name = "profile_forward_instrumented"
path = "examples/profile_forward_instrumented.rs"
[[example]]
name = "profile_forward_pass"
path = "examples/profile_forward_pass.rs"
[[example]]
name = "profile_full_forward"
path = "examples/profile_full_forward.rs"
[[example]]
name = "profile_inference"
path = "examples/profile_inference.rs"
[[example]]
name = "profile_layer_breakdown"
path = "examples/profile_layer_breakdown.rs"
[[example]]
name = "profile_lm_head"
path = "examples/profile_lm_head.rs"
[[example]]
name = "profile_matmul_cold"
path = "examples/profile_matmul_cold.rs"
[[example]]
name = "profile_matmul_detail"
path = "examples/profile_matmul_detail.rs"
[[example]]
name = "profile_matmul_sizes"
path = "examples/profile_matmul_sizes.rs"
[[example]]
name = "profile_non_matmul"
path = "examples/profile_non_matmul.rs"
[[example]]
name = "profile_phi2_simple"
path = "examples/profile_phi2_simple.rs"
[[example]]
name = "profile_q8k_quant"
path = "examples/profile_q8k_quant.rs"
[[example]]
name = "profile_rayon_overhead"
path = "examples/profile_rayon_overhead.rs"
[[example]]
name = "profile_scratch_vs_cache"
path = "examples/profile_scratch_vs_cache.rs"
[[example]]
name = "q6k_bench"
path = "examples/q6k_bench.rs"
[[example]]
name = "quick_generate"
path = "examples/quick_generate.rs"
[[example]]
name = "qwen_apr_demo"
path = "examples/qwen_apr_demo.rs"
[[example]]
name = "safetensors_loading"
path = "examples/safetensors_loading.rs"
[[example]]
name = "test_0_5b_raw"
path = "examples/test_0_5b_raw.rs"
[[example]]
name = "test_1_5b_raw"
path = "examples/test_1_5b_raw.rs"
[[example]]
name = "test_apr_q4k_forward"
path = "examples/test_apr_q4k_forward.rs"
[[example]]
name = "test_apr_q4k_generate"
path = "examples/test_apr_q4k_generate.rs"
[[example]]
name = "test_apr_quantized_cache"
path = "examples/test_apr_quantized_cache.rs"
required-features = ["cuda"]
[[example]]
name = "test_attention_debug"
path = "examples/test_attention_debug.rs"
required-features = ["cuda"]
[[example]]
name = "test_attention_phi2_dims"
path = "examples/test_attention_phi2_dims.rs"
required-features = ["cuda"]
[[example]]
name = "test_chat_format"
path = "examples/test_chat_format.rs"
[[example]]
name = "test_coherence"
path = "examples/test_coherence.rs"
[[example]]
name = "test_cpu_chat"
path = "examples/test_cpu_chat.rs"
[[example]]
name = "test_cuda_minimal"
path = "examples/test_cuda_minimal.rs"
required-features = ["cuda"]
[[example]]
name = "test_ffn_q4k"
path = "examples/test_ffn_q4k.rs"
[[example]]
name = "test_forward"
path = "examples/test_forward.rs"
[[example]]
name = "test_gemv_correctness"
path = "examples/test_gemv_correctness.rs"
required-features = ["cuda"]
[[example]]
name = "test_generation"
path = "examples/test_generation.rs"
required-features = ["cuda"]
[[example]]
name = "test_gguf_baseline"
path = "examples/test_gguf_baseline.rs"
[[example]]
name = "test_gpu_bias"
path = "examples/test_gpu_bias.rs"
required-features = ["cuda"]
[[example]]
name = "test_graphed"
path = "examples/test_graphed.rs"
required-features = ["cuda"]
[[example]]
name = "test_inference"
path = "examples/test_inference.rs"
[[example]]
name = "test_lm_head_direct"
path = "examples/test_lm_head_direct.rs"
[[example]]
name = "test_lm_head_only"
path = "examples/test_lm_head_only.rs"
required-features = ["cuda"]
[[example]]
name = "test_m16"
path = "examples/test_m16.rs"
required-features = ["cuda"]
[[example]]
name = "test_multiple_prompts"
path = "examples/test_multiple_prompts.rs"
[[example]]
name = "test_no_bias"
path = "examples/test_no_bias.rs"
[[example]]
name = "test_ollama_match"
path = "examples/test_ollama_match.rs"
[[example]]
name = "test_q4_0_parity"
path = "examples/test_q4_0_parity.rs"
[[example]]
name = "test_q4k_cuda"
path = "examples/test_q4k_cuda.rs"
required-features = ["cuda"]
[[example]]
name = "test_q4k_phi2_dims"
path = "examples/test_q4k_phi2_dims.rs"
required-features = ["cuda"]
[[example]]
name = "test_q6k_correctness"
path = "examples/test_q6k_correctness.rs"
required-features = ["cuda"]
[[example]]
name = "test_q6k_gemv_direct"
path = "examples/test_q6k_gemv_direct.rs"
required-features = ["cuda"]
[[example]]
name = "test_q6k_single_row"
path = "examples/test_q6k_single_row.rs"
required-features = ["cuda"]
[[example]]
name = "test_qkv_matmul"
path = "examples/test_qkv_matmul.rs"
[[example]]
name = "test_qkv_with_bias"
path = "examples/test_qkv_with_bias.rs"
[[example]]
name = "test_qwen_prompt"
path = "examples/test_qwen_prompt.rs"
[[example]]
name = "test_qwen_seq"
path = "examples/test_qwen_seq.rs"
[[example]]
name = "test_rope_override"
path = "examples/test_rope_override.rs"
[[example]]
name = "test_tc_attention"
path = "examples/test_tc_attention.rs"
required-features = ["cuda"]
[[example]]
name = "test_tiled_vs_cpu"
path = "examples/test_tiled_vs_cpu.rs"
required-features = ["cuda"]
[[example]]
name = "test_tinyllama"
path = "examples/test_tinyllama.rs"
[[example]]
name = "test_tinyllama_simple"
path = "examples/test_tinyllama_simple.rs"
[[example]]
name = "test_transpose"
path = "examples/test_transpose.rs"
[[example]]
name = "test_v_matvec"
path = "examples/test_v_matvec.rs"
[[example]]
name = "tinyllama_survey"
path = "examples/tinyllama_survey.rs"
[[example]]
name = "token_survey"
path = "examples/token_survey.rs"
[[example]]
name = "tokenization"
path = "examples/tokenization.rs"
[[example]]
name = "trace_2plus2"
path = "examples/trace_2plus2.rs"
[[example]]
name = "trace_all_layers"
path = "examples/trace_all_layers.rs"
required-features = ["cuda"]
[[example]]
name = "trace_attention"
path = "examples/trace_attention.rs"
[[example]]
name = "trace_attention_scores"
path = "examples/trace_attention_scores.rs"
[[example]]
name = "trace_attn_weights"
path = "examples/trace_attn_weights.rs"
[[example]]
name = "trace_buggy_vs_ok"
path = "examples/trace_buggy_vs_ok.rs"
[[example]]
name = "trace_causal_attn"
path = "examples/trace_causal_attn.rs"
[[example]]
name = "trace_cpu_forward"
path = "examples/trace_cpu_forward.rs"
[[example]]
name = "trace_divergence"
path = "examples/trace_divergence.rs"
[[example]]
name = "trace_final"
path = "examples/trace_final.rs"
required-features = ["cuda"]
[[example]]
name = "trace_final_hidden"
path = "examples/trace_final_hidden.rs"
[[example]]
name = "trace_forward"
path = "examples/trace_forward.rs"
[[example]]
name = "trace_forward_detailed"
path = "examples/trace_forward_detailed.rs"
[[example]]
name = "trace_full_layer0"
path = "examples/trace_full_layer0.rs"
[[example]]
name = "trace_hidden_flow"
path = "examples/trace_hidden_flow.rs"
[[example]]
name = "trace_layer0_detailed"
path = "examples/trace_layer0_detailed.rs"
[[example]]
name = "trace_layer0_qkv"
path = "examples/trace_layer0_qkv.rs"
[[example]]
name = "trace_layer0_qkv_fixed"
path = "examples/trace_layer0_qkv_fixed.rs"
[[example]]
name = "trace_layer21"
path = "examples/trace_layer21.rs"
[[example]]
name = "trace_layer2_detail"
path = "examples/trace_layer2_detail.rs"
[[example]]
name = "trace_lm_head"
path = "examples/trace_lm_head.rs"
[[example]]
name = "trace_pos1_divergence"
path = "examples/trace_pos1_divergence.rs"
required-features = ["cuda"]
[[example]]
name = "trace_qkv_output"
path = "examples/trace_qkv_output.rs"
[[example]]
name = "trace_qkv_step"
path = "examples/trace_qkv_step.rs"
[[example]]
name = "trace_qwen_qkv"
path = "examples/trace_qwen_qkv.rs"
[[example]]
name = "trace_safetensors"
path = "examples/trace_safetensors.rs"
[[example]]
name = "trace_single_layer"
path = "examples/trace_single_layer.rs"
[[example]]
name = "trace_single_token_full"
path = "examples/trace_single_token_full.rs"
[[example]]
name = "trace_single_vs_multi"
path = "examples/trace_single_vs_multi.rs"
[[example]]
name = "trueno_ab_test"
path = "examples/trueno_ab_test.rs"
[[example]]
name = "trueno_dot_test"
path = "examples/trueno_dot_test.rs"
[[example]]
name = "validated_tensors"
path = "examples/validated_tensors.rs"
[[example]]
name = "verify_apr_q4k"
path = "examples/verify_apr_q4k.rs"
[[example]]
name = "verify_attention_kernel"
path = "examples/verify_attention_kernel.rs"
required-features = ["cuda"]
[[example]]
name = "verify_embed"
path = "examples/verify_embed.rs"
[[example]]
name = "verify_embedding"
path = "examples/verify_embedding.rs"
[[example]]
name = "verify_embeddings"
path = "examples/verify_embeddings.rs"
[[example]]
name = "verify_input_token"
path = "examples/verify_input_token.rs"
[[example]]
name = "verify_lm_head"
path = "examples/verify_lm_head.rs"
[[example]]
name = "verify_offsets"
path = "examples/verify_offsets.rs"
[[example]]
name = "verify_q4_0_correct"
path = "examples/verify_q4_0_correct.rs"
[[example]]
name = "verify_q4_0_full"
path = "examples/verify_q4_0_full.rs"
[[example]]
name = "verify_q4_0_matmul"
path = "examples/verify_q4_0_matmul.rs"
[[example]]
name = "verify_q4k_layout"
path = "examples/verify_q4k_layout.rs"
[[example]]
name = "verify_q8_0"
path = "examples/verify_q8_0.rs"
[[example]]
name = "verify_rmsnorm"
path = "examples/verify_rmsnorm.rs"
[[example]]
name = "verify_rope"
path = "examples/verify_rope.rs"
[[example]]
name = "verify_rope_pos0"
path = "examples/verify_rope_pos0.rs"
[[example]]
name = "verify_rope_type"
path = "examples/verify_rope_type.rs"
[[example]]
name = "verify_tensor_offsets"
path = "examples/verify_tensor_offsets.rs"
[[example]]
name = "verify_tinyllama_multitoken"
path = "examples/verify_tinyllama_multitoken.rs"
[[example]]
name = "verify_v_parallel_matvec"
path = "examples/verify_v_parallel_matvec.rs"
[[example]]
name = "weight_check"
path = "examples/weight_check.rs"
[[example]]
name = "wine_lambda"
path = "examples/wine_lambda.rs"
[dependencies.alimentar]
version = "0.2"
features = [
"local",
"shuffle",
]
optional = true
default-features = false
[dependencies.anyhow]
version = "1.0"
[dependencies.aprender]
version = ">=0.27"
optional = true
[dependencies.arc-swap]
version = "1.7"
optional = true
[dependencies.async-stream]
version = "0.3"
optional = true
[dependencies.axum]
version = "0.7"
optional = true
[dependencies.chrono]
version = ">=0.4.26"
features = ["serde"]
[dependencies.clap]
version = "4"
features = ["derive"]
optional = true
[dependencies.crossterm]
version = "0.28"
optional = true
[dependencies.futures]
version = "0.3"
optional = true
[dependencies.half]
version = "2.4"
features = ["std"]
[dependencies.lz4_flex]
version = "0.11"
optional = true
[dependencies.memmap2]
version = "0.9"
[dependencies.minijinja]
version = "2.14"
features = ["loader"]
[dependencies.num-traits]
version = "0.2"
[dependencies.once_cell]
version = "1.21"
[dependencies.pacha]
version = "0.2"
optional = true
[dependencies.presentar-terminal]
version = "0.3.2"
optional = true
[dependencies.rand]
version = "0.8"
[dependencies.ratatui]
version = "0.29"
optional = true
[dependencies.rayon]
version = "1.10"
[dependencies.reqwest]
version = "0.11"
features = [
"json",
"blocking",
]
optional = true
[dependencies.serde]
version = "1"
features = ["derive"]
[dependencies.serde_json]
version = "1"
[dependencies.smallvec]
version = "1.13"
[dependencies.thiserror]
version = "1.0"
[dependencies.tokio]
version = "1"
features = [
"rt-multi-thread",
"macros",
]
optional = true
[dependencies.tokio-stream]
version = "0.1"
optional = true
[dependencies.tower]
version = "0.4"
features = ["util"]
optional = true
[dependencies.tracing]
version = "0.1"
optional = true
[dependencies.trueno]
version = "0.16"
features = ["gpu"]
[dependencies.trueno-db]
version = "0.3"
optional = true
[dependencies.trueno-gpu]
version = "0.4"
features = ["cuda"]
optional = true
[dependencies.trueno-quant]
version = "0.1"
[dependencies.trueno-viz]
version = "0.2"
features = ["terminal"]
optional = true
[dependencies.ureq]
version = "2"
features = ["json"]
optional = true
[dependencies.uuid]
version = "1"
features = [
"v4",
"serde",
]
[dependencies.zstd]
version = "0.13"
optional = true
[dev-dependencies.approx]
version = "0.5"
[dev-dependencies.arrow]
version = "53"
default-features = false
[dev-dependencies.assert_cmd]
version = "2.0"
[dev-dependencies.comfy-table]
version = "6"
[dev-dependencies.console]
version = "0.15"
[dev-dependencies.criterion]
version = "0.5"
features = ["html_reports"]
[dev-dependencies.glob]
version = "0.3"
[dev-dependencies.http-body-util]
version = "0.1"
[dev-dependencies.hyper]
version = "1.4"
features = ["full"]
[dev-dependencies.indicatif]
version = "0.17"
[dev-dependencies.jugar-probar]
version = "0.4"
features = [
"tui",
"gpu",
]
[dev-dependencies.mime]
version = "0.3"
[dev-dependencies.predicates]
version = "3.0"
[dev-dependencies.proptest]
version = "1.4"
[dev-dependencies.reqwest]
version = "0.11"
features = [
"json",
"blocking",
]
[dev-dependencies.serial_test]
version = "3"
[dev-dependencies.tempfile]
version = "3"
[dev-dependencies.trueno-cuda-edge]
version = "0.1"
[build-dependencies.serde]
version = "1"
features = ["derive"]
[build-dependencies.serde_yaml_ng]
version = "0.10"
[target.'cfg(target_arch = "wasm32")'.dependencies.uuid]
version = "1"
features = [
"v4",
"serde",
"js",
]
[target."cfg(unix)".dependencies.libc]
version = "0.2"
[lints.clippy]
checked_conversions = "warn"
expect_used = "allow"
missing_errors_doc = "allow"
missing_panics_doc = "allow"
module_name_repetitions = "allow"
must_use_candidate = "allow"
redundant_clone = "warn"
similar_names = "allow"
unreadable_literal = "allow"
unwrap_used = "warn"
useless_vec = "allow"
[lints.clippy.all]
level = "warn"
priority = -1
[lints.clippy.pedantic]
level = "warn"
priority = -1
[lints.rust]
unreachable_pub = "warn"
unsafe_code = "allow"
unsafe_op_in_unsafe_fn = "warn"
[lints.rust.unexpected_cfgs]
level = "warn"
priority = 0
check-cfg = ["cfg(coverage)"]
[profile.bench]
inherits = "release"
[profile.dev]
opt-level = 0
debug = 2
panic = "abort"
[profile.dev.package."*"]
opt-level = 3
[profile.release]
opt-level = 3
lto = "fat"
codegen-units = 1
panic = "abort"
strip = true
[profile.test]
opt-level = 3
lto = false
codegen-units = 4
debug = 0
incremental = true