1use std::path::{Path, PathBuf};
38use std::time::Duration;
39
40#[derive(Debug, Clone)]
45pub struct CrawlerConfig {
46 pub max_concurrent_downloads: usize,
48 pub parser_workers: usize,
50 pub max_concurrent_pipelines: usize,
52 pub channel_capacity: usize,
54 pub live_stats: bool,
56 pub live_stats_interval: Duration,
58}
59
60impl Default for CrawlerConfig {
61 fn default() -> Self {
62 CrawlerConfig {
63 max_concurrent_downloads: num_cpus::get().max(16),
64 parser_workers: num_cpus::get().clamp(4, 16),
65 max_concurrent_pipelines: num_cpus::get().min(8),
66 channel_capacity: 1000,
67 live_stats: false,
68 live_stats_interval: Duration::from_millis(50),
69 }
70 }
71}
72
73impl CrawlerConfig {
74 pub fn new() -> Self {
76 Self::default()
77 }
78
79 pub fn with_max_concurrent_downloads(mut self, limit: usize) -> Self {
81 self.max_concurrent_downloads = limit;
82 self
83 }
84
85 pub fn with_parser_workers(mut self, count: usize) -> Self {
87 self.parser_workers = count;
88 self
89 }
90
91 pub fn with_max_concurrent_pipelines(mut self, limit: usize) -> Self {
93 self.max_concurrent_pipelines = limit;
94 self
95 }
96
97 pub fn with_channel_capacity(mut self, capacity: usize) -> Self {
99 self.channel_capacity = capacity;
100 self
101 }
102
103 pub fn with_live_stats(mut self, enabled: bool) -> Self {
105 self.live_stats = enabled;
106 self
107 }
108
109 pub fn with_live_stats_interval(mut self, interval: Duration) -> Self {
111 self.live_stats_interval = interval;
112 self
113 }
114
115 pub fn validate(&self) -> Result<(), String> {
117 if self.max_concurrent_downloads == 0 {
118 return Err("max_concurrent_downloads must be greater than 0".to_string());
119 }
120 if self.parser_workers == 0 {
121 return Err("parser_workers must be greater than 0".to_string());
122 }
123 if self.max_concurrent_pipelines == 0 {
124 return Err("max_concurrent_pipelines must be greater than 0".to_string());
125 }
126 if self.live_stats_interval.is_zero() {
127 return Err("live_stats_interval must be greater than 0".to_string());
128 }
129 Ok(())
130 }
131}
132
133#[derive(Debug, Clone, Default)]
138pub struct CheckpointConfig {
139 pub path: Option<PathBuf>,
141 pub interval: Option<Duration>,
143}
144
145impl CheckpointConfig {
146 pub fn new() -> Self {
148 Self::default()
149 }
150
151 pub fn builder() -> CheckpointConfigBuilder {
153 CheckpointConfigBuilder::default()
154 }
155
156 pub fn with_path<P: AsRef<Path>>(mut self, path: P) -> Self {
158 self.path = Some(path.as_ref().to_path_buf());
159 self
160 }
161
162 pub fn with_interval(mut self, interval: Duration) -> Self {
164 self.interval = Some(interval);
165 self
166 }
167
168 pub fn is_enabled(&self) -> bool {
170 self.path.is_some()
171 }
172}
173
174#[derive(Debug, Default)]
176pub struct CheckpointConfigBuilder {
177 path: Option<PathBuf>,
178 interval: Option<Duration>,
179}
180
181impl CheckpointConfigBuilder {
182 pub fn new() -> Self {
184 Self::default()
185 }
186
187 pub fn path<P: AsRef<Path>>(mut self, path: P) -> Self {
189 self.path = Some(path.as_ref().to_path_buf());
190 self
191 }
192
193 pub fn interval(mut self, interval: Duration) -> Self {
195 self.interval = Some(interval);
196 self
197 }
198
199 pub fn build(self) -> CheckpointConfig {
201 CheckpointConfig {
202 path: self.path,
203 interval: self.interval,
204 }
205 }
206}
207
208#[derive(Debug, Clone)]
212pub struct ParserConfig {
213 pub worker_count: usize,
215 pub queue_capacity: usize,
217}
218
219impl Default for ParserConfig {
220 fn default() -> Self {
221 ParserConfig {
222 worker_count: num_cpus::get().clamp(4, 16),
223 queue_capacity: 100,
224 }
225 }
226}
227
228impl ParserConfig {
229 pub fn new() -> Self {
231 Self::default()
232 }
233
234 pub fn with_worker_count(mut self, count: usize) -> Self {
236 self.worker_count = count;
237 self
238 }
239
240 pub fn with_queue_capacity(mut self, capacity: usize) -> Self {
242 self.queue_capacity = capacity;
243 self
244 }
245}
246
247#[derive(Debug, Clone)]
251pub struct DownloaderConfig {
252 pub max_concurrent: usize,
254 pub backpressure_threshold: usize,
256}
257
258impl Default for DownloaderConfig {
259 fn default() -> Self {
260 let max_concurrent = num_cpus::get().max(16);
261 DownloaderConfig {
262 max_concurrent,
263 backpressure_threshold: max_concurrent * 2,
264 }
265 }
266}
267
268impl DownloaderConfig {
269 pub fn new() -> Self {
271 Self::default()
272 }
273
274 pub fn with_max_concurrent(mut self, limit: usize) -> Self {
276 self.max_concurrent = limit;
277 self
278 }
279
280 pub fn with_backpressure_threshold(mut self, threshold: usize) -> Self {
282 self.backpressure_threshold = threshold;
283 self
284 }
285}
286
287#[derive(Debug, Clone)]
291pub struct ItemProcessorConfig {
292 pub max_concurrent: usize,
294}
295
296impl Default for ItemProcessorConfig {
297 fn default() -> Self {
298 ItemProcessorConfig {
299 max_concurrent: num_cpus::get().min(8),
300 }
301 }
302}
303
304impl ItemProcessorConfig {
305 pub fn new() -> Self {
307 Self::default()
308 }
309
310 pub fn with_max_concurrent(mut self, limit: usize) -> Self {
312 self.max_concurrent = limit;
313 self
314 }
315}
316
317#[cfg(test)]
318mod tests {
319 use super::*;
320
321 #[test]
322 fn test_crawler_config_default() {
323 let config = CrawlerConfig::default();
324 assert!(config.max_concurrent_downloads > 0);
325 assert!(config.parser_workers > 0);
326 assert!(config.max_concurrent_pipelines > 0);
327 assert!(config.channel_capacity > 0);
328 }
329
330 #[test]
331 fn test_crawler_config_builder() {
332 let config = CrawlerConfig::new()
333 .with_max_concurrent_downloads(20)
334 .with_parser_workers(8)
335 .with_max_concurrent_pipelines(4)
336 .with_channel_capacity(500);
337
338 assert_eq!(config.max_concurrent_downloads, 20);
339 assert_eq!(config.parser_workers, 8);
340 assert_eq!(config.max_concurrent_pipelines, 4);
341 assert_eq!(config.channel_capacity, 500);
342 }
343
344 #[test]
345 fn test_crawler_config_validation() {
346 let valid_config = CrawlerConfig::default();
347 assert!(valid_config.validate().is_ok());
348
349 let invalid_config = CrawlerConfig::new().with_max_concurrent_downloads(0);
350 assert!(invalid_config.validate().is_err());
351 }
352
353 #[test]
354 fn test_checkpoint_config_builder() {
355 let config = CheckpointConfig::builder()
356 .path("./test.checkpoint")
357 .interval(Duration::from_secs(30))
358 .build();
359
360 assert!(config.path.is_some());
361 assert!(config.interval.is_some());
362 assert!(config.is_enabled());
363 }
364
365 #[test]
366 fn test_checkpoint_config_disabled() {
367 let config = CheckpointConfig::new();
368 assert!(!config.is_enabled());
369 }
370}