1use serde::Deserialize;
2
3#[derive(Debug, Clone, Deserialize)]
4pub struct AppConfig {
5 #[serde(default)]
6 pub server: ServerConfig,
7 #[serde(default)]
8 pub renderer: RendererConfig,
9 #[serde(default)]
10 pub crawler: CrawlerConfig,
11 #[serde(default)]
12 pub extraction: ExtractionConfig,
13 #[serde(default)]
14 pub auth: AuthConfig,
15}
16
17#[derive(Debug, Clone, Deserialize)]
18pub struct ServerConfig {
19 #[serde(default = "default_host")]
20 pub host: String,
21 #[serde(default = "default_port")]
22 pub port: u16,
23 #[serde(default = "default_request_timeout")]
24 pub request_timeout_secs: u64,
25}
26
27impl Default for ServerConfig {
28 fn default() -> Self {
29 Self {
30 host: default_host(),
31 port: default_port(),
32 request_timeout_secs: default_request_timeout(),
33 }
34 }
35}
36
37fn default_host() -> String {
38 "0.0.0.0".into()
39}
40fn default_port() -> u16 {
41 3000
42}
43fn default_request_timeout() -> u64 {
44 60
45}
46
47#[derive(Debug, Clone, Deserialize)]
48pub struct RendererConfig {
49 #[serde(default = "default_renderer_mode")]
50 pub mode: String,
51 #[serde(default = "default_page_timeout")]
52 pub page_timeout_ms: u64,
53 #[serde(default = "default_pool_size")]
54 pub pool_size: usize,
55 #[serde(default)]
56 pub lightpanda: Option<CdpEndpoint>,
57 #[serde(default)]
58 pub playwright: Option<CdpEndpoint>,
59 #[serde(default)]
60 pub chrome: Option<CdpEndpoint>,
61}
62
63impl Default for RendererConfig {
64 fn default() -> Self {
65 Self {
66 mode: default_renderer_mode(),
67 page_timeout_ms: default_page_timeout(),
68 pool_size: default_pool_size(),
69 lightpanda: None,
70 playwright: None,
71 chrome: None,
72 }
73 }
74}
75
76fn default_renderer_mode() -> String {
77 "auto".into()
78}
79fn default_page_timeout() -> u64 {
80 30000
81}
82fn default_pool_size() -> usize {
83 4
84}
85
86#[derive(Debug, Clone, Deserialize)]
87pub struct CdpEndpoint {
88 pub ws_url: String,
89}
90
91#[derive(Debug, Clone, Deserialize)]
92pub struct CrawlerConfig {
93 #[serde(default = "default_concurrency")]
94 pub max_concurrency: usize,
95 #[serde(default = "default_rps")]
96 pub requests_per_second: f64,
97 #[serde(default = "default_true")]
98 pub respect_robots_txt: bool,
99 #[serde(default = "default_ua")]
100 pub user_agent: String,
101 #[serde(default = "default_depth")]
102 pub default_max_depth: u32,
103 #[serde(default = "default_max_pages")]
104 pub default_max_pages: u32,
105 #[serde(default)]
107 pub proxy: Option<String>,
108 #[serde(default = "default_job_ttl")]
110 pub job_ttl_secs: u64,
111}
112
113impl Default for CrawlerConfig {
114 fn default() -> Self {
115 Self {
116 max_concurrency: default_concurrency(),
117 requests_per_second: default_rps(),
118 respect_robots_txt: true,
119 user_agent: default_ua(),
120 default_max_depth: default_depth(),
121 default_max_pages: default_max_pages(),
122 proxy: None,
123 job_ttl_secs: default_job_ttl(),
124 }
125 }
126}
127
128fn default_concurrency() -> usize {
129 10
130}
131fn default_rps() -> f64 {
132 10.0
133}
134fn default_true() -> bool {
135 true
136}
137fn default_ua() -> String {
138 "CRW/0.1".into()
139}
140fn default_depth() -> u32 {
141 2
142}
143fn default_max_pages() -> u32 {
144 100
145}
146fn default_job_ttl() -> u64 {
147 3600
148}
149
150#[derive(Debug, Clone, Deserialize)]
151pub struct ExtractionConfig {
152 #[serde(default = "default_format")]
153 pub default_format: String,
154 #[serde(default = "default_true_ext")]
155 pub only_main_content: bool,
156 #[serde(default)]
157 pub llm: Option<LlmConfig>,
158}
159
160impl Default for ExtractionConfig {
161 fn default() -> Self {
162 Self {
163 default_format: default_format(),
164 only_main_content: true,
165 llm: None,
166 }
167 }
168}
169
170#[derive(Debug, Clone, Deserialize)]
171pub struct LlmConfig {
172 #[serde(default = "default_llm_provider")]
173 pub provider: String,
174 pub api_key: String,
175 #[serde(default = "default_llm_model")]
176 pub model: String,
177 #[serde(default)]
178 pub base_url: Option<String>,
179 #[serde(default = "default_llm_max_tokens")]
180 pub max_tokens: u32,
181}
182
183fn default_llm_provider() -> String {
184 "anthropic".into()
185}
186fn default_llm_model() -> String {
187 "claude-sonnet-4-20250514".into()
188}
189fn default_llm_max_tokens() -> u32 {
190 4096
191}
192
193fn default_format() -> String {
194 "markdown".into()
195}
196fn default_true_ext() -> bool {
197 true
198}
199
200#[derive(Debug, Clone, Default, Deserialize)]
201pub struct AuthConfig {
202 #[serde(default)]
203 pub api_keys: Vec<String>,
204}
205
206impl AppConfig {
207 pub fn load() -> Result<Self, config::ConfigError> {
210 let mut builder = config::Config::builder()
211 .add_source(config::File::with_name("config.default").required(false));
212
213 if let Ok(extra) = std::env::var("CRW_CONFIG") {
215 builder = builder.add_source(config::File::with_name(&extra).required(true));
216 } else {
217 builder = builder.add_source(config::File::with_name("config.local").required(false));
218 }
219
220 let cfg = builder
221 .add_source(
222 config::Environment::with_prefix("CRW")
223 .separator("__")
224 .try_parsing(true),
225 )
226 .build()?;
227 cfg.try_deserialize()
228 }
229}