vectorless 0.1.6

Hierarchical, reasoning-native document intelligence engine
Documentation
# Vectorless Configuration Example
# Copy this file to config.toml and fill in your API keys

[indexer]
# Word count threshold for splitting sections into subsections
subsection_threshold = 300

# Maximum tokens to send in a single segmentation request
max_segment_tokens = 3000

# Maximum tokens for each summary
max_summary_tokens = 200

[summary]
# API key - get from your provider
# api_key = "sk-..."

# API endpoint
# OpenAI: https://api.openai.com/v1
# ZAI General: https://api.z.ai/api/paas/v4
# ZAI Coding: https://api.z.ai/api/coding/paas/v4
endpoint = "https://api.openai.com/v1"

# Model for summarization (use cheaper models for indexing)
model = "gpt-4o-mini"

# Maximum tokens for summary generation
max_tokens = 200

# Temperature for summary generation
temperature = 0.0

[retrieval]
# API key (optional, defaults to summary.api_key)
# api_key = "sk-..."

# API endpoint for retrieval
endpoint = "https://api.openai.com/v1"

# Model for retrieval navigation (use smarter models for better results)
model = "gpt-4o"

# Retriever type: llm_navigate, beam_search, mcts, multi_doc, hybrid
retriever_type = "llm_navigate"

# Number of top results to return
top_k = 3

# Maximum tokens for retrieval context
max_tokens = 1000

# Temperature for retrieval
temperature = 0.0

[storage]
# Workspace directory for persisted documents
#
# Structure:
# workspace/
# ├── _meta.json           # Lightweight index
# ├── {doc_id_1}.json      # Document 1
# └── {doc_id_2}.json      # Document 2
workspace_dir = "./workspace"

[concurrency]
# Maximum concurrent LLM API calls
# This limits how many requests can be in-flight at the same time
max_concurrent_requests = 10

# Rate limit: requests per minute
# This is a soft limit using token bucket algorithm
requests_per_minute = 500

# Enable rate limiting (token bucket)
enabled = true

# Enable semaphore-based concurrency limiting
semaphore_enabled = true

[fallback]
# Enable graceful degradation when LLM calls fail
enabled = true

# Fallback models in priority order
# When primary model fails, system tries these in order
models = ["gpt-4o-mini", "glm-4-flash"]

# Fallback endpoints (optional)
# When primary endpoint fails, system tries these in order
# endpoints = [
#     "https://api.openai.com/v1",
#     "https://api.z.ai/api/paas/v4"
# ]

# Behavior on rate limit error (429)
# Options: retry, fallback, retry_then_fallback, fail
on_rate_limit = "retry_then_fallback"

# Behavior on timeout error
# Options: retry, fallback, retry_then_fallback, fail
on_timeout = "retry_then_fallback"

# Behavior when all attempts fail
# Options: return_error, return_cache, return_default
# return_default requires a value field
on_all_failed = "return_error"