helios-engine 0.5.5

[llm]
model_name = "gpt-3.5-turbo"
base_url = "https://api.openai.com/v1"
api_key = "your-api-key-here"
temperature = 0.7
max_tokens = 2048

# Local llama.cpp backend configuration
# Enable with: cargo run --features local
[local]
huggingface_repo = "unsloth/Qwen3-0.6B-GGUF"
model_file = "Qwen3-0.6B-Q4_K_M.gguf"
context_size = 2048
temperature = 0.7
max_tokens = 2048

# Candle backend configuration
# Enable with: cargo run --features candle
# Supports models: Qwen, Llama, Gemma, Mistral, and more
#
# IMPORTANT: Models are automatically loaded from your local HuggingFace cache first.
# If the model is not cached, it will be downloaded from HuggingFace Hub.
# HuggingFace cache location: ~/.cache/huggingface/
# Or set custom location with: export HF_HOME=/path/to/cache
[candle]
# Example: Qwen2-0.5B-Instruct-GGUF (quantized, lightweight, fast)
huggingface_repo = "Qwen/Qwen2-0.5B-Instruct-GGUF"
model_file = "qwen2-0_5b-instruct-q4_0.gguf"
context_size = 32768
temperature = 0.7
max_tokens = 2048
use_gpu = true

# Alternative model examples for Candle:
# Qwen2: huggingface_repo = "unsloth/Qwen2-7B"
# Qwen3: huggingface_repo = "unsloth/Qwen3-7B"
# Llama2: huggingface_repo = "meta-llama/Llama-2-7b"
# Gemma: huggingface_repo = "google/gemma-7b"
# Mistral: huggingface_repo = "mistralai/Mistral-7B"