1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
[]
= "gpt-3.5-turbo"
= "https://api.openai.com/v1"
= "your-api-key-here"
= 0.7
= 2048
# Local llama.cpp backend configuration
# Enable with: cargo run --features local
[]
= "unsloth/Qwen3-0.6B-GGUF"
= "Qwen3-0.6B-Q4_K_M.gguf"
= 2048
= 0.7
= 2048
# Candle backend configuration
# Enable with: cargo run --features candle
# Supports models: Qwen, Llama, Gemma, Mistral, and more
#
# IMPORTANT: Models are automatically loaded from your local HuggingFace cache first.
# If the model is not cached, it will be downloaded from HuggingFace Hub.
# HuggingFace cache location: ~/.cache/huggingface/
# Or set custom location with: export HF_HOME=/path/to/cache
[]
# Example: Qwen2-0.5B-Instruct-GGUF (quantized, lightweight, fast)
= "Qwen/Qwen2-0.5B-Instruct-GGUF"
= "qwen2-0_5b-instruct-q4_0.gguf"
= 32768
= 0.7
= 2048
= true
# Alternative model examples for Candle:
# Qwen2: huggingface_repo = "unsloth/Qwen2-7B"
# Qwen3: huggingface_repo = "unsloth/Qwen3-7B"
# Llama2: huggingface_repo = "meta-llama/Llama-2-7b"
# Gemma: huggingface_repo = "google/gemma-7b"
# Mistral: huggingface_repo = "mistralai/Mistral-7B"