1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Vectorless Configuration Example
# Copy this file to config.toml and fill in your API keys
[]
# Word count threshold for splitting sections into subsections
= 300
# Maximum tokens to send in a single segmentation request
= 3000
# Maximum tokens for each summary
= 200
[]
# API key - get from your provider
# api_key = "sk-..."
# API endpoint
# OpenAI: https://api.openai.com/v1
# ZAI General: https://api.z.ai/api/paas/v4
# ZAI Coding: https://api.z.ai/api/coding/paas/v4
= "https://api.openai.com/v1"
# Model for summarization (use cheaper models for indexing)
= "gpt-4o-mini"
# Maximum tokens for summary generation
= 200
# Temperature for summary generation
= 0.0
[]
# API key (optional, defaults to summary.api_key)
# api_key = "sk-..."
# API endpoint for retrieval
= "https://api.openai.com/v1"
# Model for retrieval navigation (use smarter models for better results)
= "gpt-4o"
# Retriever type: llm_navigate, beam_search, mcts, multi_doc, hybrid
= "llm_navigate"
# Number of top results to return
= 3
# Maximum tokens for retrieval context
= 1000
# Temperature for retrieval
= 0.0
[]
# Workspace directory for persisted documents
#
# Structure:
# workspace/
# ├── _meta.json # Lightweight index
# ├── {doc_id_1}.json # Document 1
# └── {doc_id_2}.json # Document 2
= "./workspace"
[]
# Maximum concurrent LLM API calls
# This limits how many requests can be in-flight at the same time
= 10
# Rate limit: requests per minute
# This is a soft limit using token bucket algorithm
= 500
# Enable rate limiting (token bucket)
= true
# Enable semaphore-based concurrency limiting
= true
[]
# Enable graceful degradation when LLM calls fail
= true
# Fallback models in priority order
# When primary model fails, system tries these in order
= ["gpt-4o-mini", "glm-4-flash"]
# Fallback endpoints (optional)
# When primary endpoint fails, system tries these in order
# endpoints = [
# "https://api.openai.com/v1",
# "https://api.z.ai/api/paas/v4"
# ]
# Behavior on rate limit error (429)
# Options: retry, fallback, retry_then_fallback, fail
= "retry_then_fallback"
# Behavior on timeout error
# Options: retry, fallback, retry_then_fallback, fail
= "retry_then_fallback"
# Behavior when all attempts fail
# Options: return_error, return_cache, return_default
# return_default requires a value field
= "return_error"