[llm]
api_key = "sk-your-api-key-here"
[llm.summary]
model = "gpt-4o-mini"
endpoint = "https://api.openai.com/v1"
max_tokens = 200
temperature = 0.0
[llm.retrieval]
model = "gpt-4o"
endpoint = "https://api.openai.com/v1"
max_tokens = 100
temperature = 0.0
[llm.pilot]
model = "gpt-4o-mini"
endpoint = "https://api.openai.com/v1"
max_tokens = 300
temperature = 0.0
[llm.retry]
max_attempts = 3
initial_delay_ms = 500
max_delay_ms = 30000
multiplier = 2.0
retry_on_rate_limit = true
[llm.throttle]
max_concurrent_requests = 10
requests_per_minute = 500
enabled = true
semaphore_enabled = true
[llm.fallback]
enabled = true
models = ["gpt-4o-mini", "glm-4-flash"]
on_rate_limit = "retry_then_fallback"
on_timeout = "retry_then_fallback"
on_all_failed = "return_error"
[metrics]
enabled = true
storage_path = "./workspace/metrics"
retention_days = 30
[metrics.llm]
track_tokens = true
track_latency = true
track_cost = true
cost_per_1k_input_tokens = 0.00015
cost_per_1k_output_tokens = 0.0006
[metrics.pilot]
track_decisions = true
track_accuracy = true
track_feedback = true
[metrics.retrieval]
track_paths = true
track_scores = true
track_iterations = true
track_cache = true
[pilot]
mode = "Balanced"
guide_at_start = true
guide_at_backtrack = true
[pilot.budget]
max_tokens_per_query = 2000
max_tokens_per_call = 500
max_calls_per_query = 5
max_calls_per_level = 2
hard_limit = true
[pilot.intervention]
fork_threshold = 3
score_gap_threshold = 0.15
low_score_threshold = 0.3
max_interventions_per_level = 2
[pilot.feedback]
enabled = true
storage_path = "./workspace/feedback"
learning_rate = 0.1
min_samples_for_learning = 10
[retrieval]
model = "gpt-4o"
endpoint = "https://api.openai.com/v1"
top_k = 3
max_tokens = 1000
temperature = 0.0
[retrieval.search]
top_k = 5
beam_width = 3
max_iterations = 10
min_score = 0.1
[retrieval.sufficiency]
min_tokens = 500
target_tokens = 2000
max_tokens = 4000
min_content_length = 200
confidence_threshold = 0.7
[retrieval.cache]
max_entries = 1000
ttl_secs = 3600
[retrieval.strategy]
exploration_weight = 1.414
similarity_threshold = 0.5
high_similarity_threshold = 0.8
low_similarity_threshold = 0.3
[retrieval.content]
enabled = true
token_budget = 4000
min_relevance_score = 0.2
scoring_strategy = "hybrid"
output_format = "markdown"
include_scores = false
hierarchical_min_per_level = 0.1
deduplicate = true
dedup_threshold = 0.9
[retrieval.multiturn]
enabled = true
max_sub_queries = 3
decomposition_model = "gpt-4o-mini"
aggregation_strategy = "merge"
[retrieval.reference]
enabled = true
max_depth = 3
max_references = 10
follow_pages = true
follow_tables_figures = true
min_confidence = 0.5
[storage]
workspace_dir = "./workspace"
cache_size = 100
atomic_writes = true
file_lock = true
checksum_enabled = true
[storage.compression]
enabled = false
algorithm = "gzip"
level = 6
[indexer]
subsection_threshold = 300
max_segment_tokens = 3000
max_summary_tokens = 200
min_summary_tokens = 20