1pub mod edit;
7pub mod provenance;
8
9use serde::{Deserialize, Serialize};
10use std::path::{Path, PathBuf};
11use std::time::Duration;
12use thiserror::Error;
13
14#[derive(Debug, Error)]
15pub enum ConfigError {
16 #[error("failed to read config at {path}: {source}")]
17 Read {
18 path: String,
19 source: std::io::Error,
20 },
21
22 #[error("failed to parse config at {path}: {source}")]
23 Parse {
24 path: String,
25 source: toml::de::Error,
26 },
27
28 #[error("invalid config at {path}: {message}")]
29 Invalid { path: String, message: String },
30}
31
32#[derive(Debug, Clone, Default, Deserialize, Serialize)]
33#[serde(deny_unknown_fields)]
34pub struct Config {
35 #[serde(default)]
36 pub fetch: FetchConfig,
37
38 #[serde(default)]
39 pub ssrf: SsrfConfig,
40
41 #[serde(default)]
42 pub debug: DebugConfig,
43
44 #[serde(default)]
45 pub cache: CacheConfig,
46
47 #[serde(default)]
48 pub tokenizer: TokenizerConfig,
49
50 #[serde(default)]
51 pub mcp: McpConfig,
52
53 #[serde(default)]
54 pub output: OutputConfig,
55
56 #[serde(default)]
57 pub rate_limit: RateLimitConfig,
58
59 #[serde(default)]
60 pub robots: RobotsConfig,
61
62 #[serde(default)]
63 pub summarization: SummarizationConfig,
64
65 #[serde(default)]
66 pub backends: std::collections::HashMap<String, BackendConfig>,
67
68 #[serde(default)]
69 pub headless: HeadlessConfig,
70
71 #[serde(default)]
72 pub image_captions: ImageCaptionsConfig,
73
74 #[serde(default)]
75 pub captioners: std::collections::BTreeMap<String, CaptionerConfig>,
76
77 #[serde(default)]
78 pub prompt_injection: PromptInjectionConfig,
79}
80
81#[derive(Debug, Clone, Deserialize, Serialize)]
82#[serde(deny_unknown_fields)]
83pub struct FetchConfig {
84 #[serde(default = "default_user_agent")]
85 pub user_agent: String,
86
87 #[serde(default = "default_timeout_secs")]
89 pub timeout_secs: u64,
90}
91
92impl Default for FetchConfig {
93 fn default() -> Self {
94 Self {
95 user_agent: default_user_agent(),
96 timeout_secs: default_timeout_secs(),
97 }
98 }
99}
100
101impl FetchConfig {
102 pub fn timeout(&self) -> Duration {
103 Duration::from_secs(self.timeout_secs)
104 }
105}
106
107impl Config {
108 pub fn apply_overrides(
115 &mut self,
116 rate_limit_rpm: Option<u32>,
117 per_host_concurrency: Option<u32>,
118 global_concurrency: Option<u32>,
119 max_retries: Option<u8>,
120 ignore_robots: bool,
121 ) {
122 if let Some(v) = rate_limit_rpm {
123 self.rate_limit.requests_per_minute_per_domain = v;
124 }
125 if let Some(v) = per_host_concurrency {
126 self.rate_limit.per_domain_concurrency = v.max(1);
127 }
128 if let Some(v) = global_concurrency {
129 self.rate_limit.global_concurrency = v.max(1);
130 }
131 if let Some(v) = max_retries {
132 self.rate_limit.max_retries = v;
133 }
134 if ignore_robots {
135 self.robots.respect = false;
136 }
137 }
138
139 #[cfg(any(test, feature = "test-loopback"))]
142 pub fn with_ssrf_level(mut self, level: &str) -> Self {
143 self.ssrf.level = level.to_string();
144 self
145 }
146}
147
148fn default_user_agent() -> String {
149 format!(
150 "Rover/{} (+https://github.com/aaronbassett/rover)",
151 env!("CARGO_PKG_VERSION")
152 )
153}
154
155fn default_timeout_secs() -> u64 {
156 15
157}
158
159#[derive(Debug, Clone, Deserialize, Serialize)]
162#[serde(deny_unknown_fields)]
163pub struct CacheConfig {
164 #[serde(default = "default_cache_default_ttl", with = "humantime_serde")]
165 pub default_ttl: Duration,
166
167 #[serde(default = "default_cache_min_ttl", with = "humantime_serde")]
168 pub min_ttl: Duration,
169
170 #[serde(default = "default_cache_max_ttl", with = "humantime_serde")]
171 pub max_ttl: Duration,
172
173 #[serde(default = "default_cache_swr_window", with = "humantime_serde")]
180 pub stale_while_revalidate_window: Duration,
181
182 #[serde(default)]
183 pub override_no_store: bool,
184
185 #[serde(default)]
186 pub override_no_store_domains: Vec<String>,
187
188 #[serde(default)]
191 pub store_raw_html: bool,
192}
193
194impl Default for CacheConfig {
195 fn default() -> Self {
196 Self {
197 default_ttl: default_cache_default_ttl(),
198 min_ttl: default_cache_min_ttl(),
199 max_ttl: default_cache_max_ttl(),
200 stale_while_revalidate_window: default_cache_swr_window(),
201 override_no_store: false,
202 override_no_store_domains: vec![],
203 store_raw_html: false,
204 }
205 }
206}
207
208fn default_cache_default_ttl() -> Duration {
209 Duration::from_secs(15 * 60)
214}
215
216fn default_cache_min_ttl() -> Duration {
217 Duration::from_secs(300)
218}
219
220fn default_cache_max_ttl() -> Duration {
221 Duration::from_secs(7 * 86400)
222}
223
224fn default_cache_swr_window() -> Duration {
225 Duration::from_secs(5 * 60)
226}
227
228#[derive(Debug, Clone, Deserialize, Serialize)]
231#[serde(deny_unknown_fields)]
232pub struct TokenizerConfig {
233 #[serde(default = "default_tokenizer")]
234 pub default: crate::tokenizer::Tokenizer,
235}
236
237impl Default for TokenizerConfig {
238 fn default() -> Self {
239 Self {
240 default: default_tokenizer(),
241 }
242 }
243}
244
245fn default_tokenizer() -> crate::tokenizer::Tokenizer {
246 crate::tokenizer::Tokenizer::O200k
247}
248
249#[derive(Debug, Clone, Deserialize, Serialize)]
252#[serde(deny_unknown_fields)]
253pub struct McpConfig {
254 #[serde(default = "default_heartbeat_interval", with = "humantime_serde")]
255 pub heartbeat_interval: Duration,
256
257 #[serde(default = "default_reap_threshold", with = "humantime_serde")]
258 pub reap_threshold: Duration,
259}
260
261impl Default for McpConfig {
262 fn default() -> Self {
263 Self {
264 heartbeat_interval: default_heartbeat_interval(),
265 reap_threshold: default_reap_threshold(),
266 }
267 }
268}
269
270fn default_heartbeat_interval() -> Duration {
271 Duration::from_secs(5)
272}
273
274fn default_reap_threshold() -> Duration {
275 Duration::from_secs(60)
276}
277
278#[derive(Debug, Clone, Default, Deserialize, Serialize)]
282#[serde(deny_unknown_fields)]
283pub struct OutputConfig {
284 #[serde(default)]
285 pub dir: Option<std::path::PathBuf>,
286}
287
288#[derive(Debug, Clone, Deserialize, Serialize)]
291#[serde(deny_unknown_fields)]
292pub struct RateLimitConfig {
293 #[serde(default = "default_rpm_per_domain")]
294 pub requests_per_minute_per_domain: u32,
295
296 #[serde(default = "default_per_domain_concurrency")]
297 pub per_domain_concurrency: u32,
298
299 #[serde(default = "default_global_concurrency")]
300 pub global_concurrency: u32,
301
302 #[serde(default = "default_max_retries")]
303 pub max_retries: u8,
304
305 #[serde(default = "default_initial_backoff", with = "humantime_serde")]
306 pub initial_backoff: Duration,
307
308 #[serde(default = "default_max_backoff", with = "humantime_serde")]
309 pub max_backoff: Duration,
310
311 #[serde(default = "default_retry_after_ceiling", with = "humantime_serde")]
312 pub retry_after_ceiling: Duration,
313
314 #[serde(default)]
317 pub jitter_seed: Option<u64>,
318
319 #[serde(default = "default_deferred_threshold_secs")]
323 pub deferred_retry_threshold_secs: u64,
324}
325
326impl Default for RateLimitConfig {
327 fn default() -> Self {
328 Self {
329 requests_per_minute_per_domain: default_rpm_per_domain(),
330 per_domain_concurrency: default_per_domain_concurrency(),
331 global_concurrency: default_global_concurrency(),
332 max_retries: default_max_retries(),
333 initial_backoff: default_initial_backoff(),
334 max_backoff: default_max_backoff(),
335 retry_after_ceiling: default_retry_after_ceiling(),
336 jitter_seed: None,
337 deferred_retry_threshold_secs: default_deferred_threshold_secs(),
338 }
339 }
340}
341
342fn default_rpm_per_domain() -> u32 {
343 60
344}
345fn default_per_domain_concurrency() -> u32 {
346 2
347}
348fn default_global_concurrency() -> u32 {
349 8
350}
351fn default_max_retries() -> u8 {
352 3
353}
354fn default_initial_backoff() -> Duration {
355 Duration::from_millis(500)
356}
357fn default_max_backoff() -> Duration {
358 Duration::from_secs(30)
359}
360fn default_retry_after_ceiling() -> Duration {
361 Duration::from_secs(300)
362}
363fn default_deferred_threshold_secs() -> u64 {
364 30
365}
366
367#[derive(Debug, Clone, Deserialize, Serialize)]
369#[serde(deny_unknown_fields)]
370pub struct RobotsConfig {
371 #[serde(default = "default_respect")]
372 pub respect: bool,
373
374 #[serde(default)]
377 pub ignore_domains: Vec<String>,
378
379 #[serde(default = "default_robots_ttl", with = "humantime_serde")]
381 pub default_ttl: Duration,
382
383 #[serde(default = "default_robots_failure_ttl", with = "humantime_serde")]
386 pub failure_ttl: Duration,
387}
388
389impl Default for RobotsConfig {
390 fn default() -> Self {
391 Self {
392 respect: default_respect(),
393 ignore_domains: Vec::new(),
394 default_ttl: default_robots_ttl(),
395 failure_ttl: default_robots_failure_ttl(),
396 }
397 }
398}
399
400fn default_respect() -> bool {
401 false
406}
407fn default_robots_ttl() -> Duration {
408 Duration::from_secs(24 * 3600)
409}
410fn default_robots_failure_ttl() -> Duration {
411 Duration::from_secs(5 * 60)
412}
413
414#[derive(Debug, Clone, Deserialize, Serialize)]
416#[serde(deny_unknown_fields)]
417pub struct SummarizationConfig {
418 #[serde(default = "default_summarization_backend")]
419 pub default_backend: String,
420
421 #[serde(default = "default_summarization_mode")]
422 pub default_mode: String,
423
424 #[serde(default = "default_summarization_style")]
425 pub default_style: String,
426
427 #[serde(default = "default_summarization_fallback")]
428 pub fallback_to_extractive: bool,
429
430 #[serde(default)]
434 pub tables: TablesSummarizationConfig,
435}
436
437impl Default for SummarizationConfig {
438 fn default() -> Self {
439 Self {
440 default_backend: default_summarization_backend(),
441 default_mode: default_summarization_mode(),
442 default_style: default_summarization_style(),
443 fallback_to_extractive: default_summarization_fallback(),
444 tables: TablesSummarizationConfig::default(),
445 }
446 }
447}
448
449fn default_summarization_backend() -> String {
450 "default".to_string()
451}
452fn default_summarization_mode() -> String {
453 "abstractive".to_string()
454}
455fn default_summarization_style() -> String {
456 "prose".to_string()
457}
458fn default_summarization_fallback() -> bool {
459 true
460}
461
462#[derive(Debug, Clone, Deserialize, Serialize)]
465#[serde(deny_unknown_fields)]
466pub struct TablesSummarizationConfig {
467 #[serde(default = "default_tables_target_tokens")]
468 pub target_tokens: usize,
469 #[serde(default = "default_tables_focus")]
470 pub focus: String,
471}
472
473impl Default for TablesSummarizationConfig {
474 fn default() -> Self {
475 Self {
476 target_tokens: default_tables_target_tokens(),
477 focus: default_tables_focus(),
478 }
479 }
480}
481
482fn default_tables_target_tokens() -> usize {
483 150
484}
485fn default_tables_focus() -> String {
486 "Describe what this table shows. Highlight any extreme values or notable rows.".to_string()
487}
488
489#[derive(Debug, Clone, Deserialize, Serialize, Default)]
493#[serde(deny_unknown_fields)]
494pub struct BackendConfig {
495 pub kind: String,
496 #[serde(default)]
497 pub provider: Option<String>,
498 #[serde(default)]
499 pub model: Option<String>,
500 #[serde(default)]
501 pub base_url: Option<String>,
502 #[serde(default)]
503 pub api_key_env: Option<String>,
504}
505
506#[derive(Debug, Clone, Deserialize, Serialize)]
508#[serde(deny_unknown_fields)]
509pub struct HeadlessConfig {
510 #[serde(default = "default_headless_max_concurrent")]
511 pub max_concurrent: usize,
512
513 #[serde(default)]
515 pub chrome_executable: String,
516
517 #[serde(default = "default_block_images")]
519 pub block_images: bool,
520
521 #[serde(default = "default_block_fonts")]
523 pub block_fonts: bool,
524
525 #[serde(default = "default_block_media")]
527 pub block_media: bool,
528
529 #[serde(default)]
532 pub block_css: bool,
533
534 #[serde(default = "default_block_third_party")]
536 pub block_third_party: bool,
537
538 #[serde(default = "default_block_service_workers")]
541 pub block_service_workers: bool,
542
543 #[serde(default = "default_headless_wait")]
546 pub default_wait: String,
547
548 #[serde(default = "default_headless_timeout_secs")]
550 pub timeout_secs: u64,
551
552 #[serde(default = "default_auto_detect_spa")]
554 pub auto_detect_spa: bool,
555}
556
557impl HeadlessConfig {
558 pub fn timeout(&self) -> std::time::Duration {
560 std::time::Duration::from_secs(self.timeout_secs)
561 }
562}
563
564impl Default for HeadlessConfig {
565 fn default() -> Self {
566 Self {
567 max_concurrent: default_headless_max_concurrent(),
568 chrome_executable: String::new(),
569 block_images: default_block_images(),
570 block_fonts: default_block_fonts(),
571 block_media: default_block_media(),
572 block_css: false,
573 block_third_party: default_block_third_party(),
574 block_service_workers: default_block_service_workers(),
575 default_wait: default_headless_wait(),
576 timeout_secs: default_headless_timeout_secs(),
577 auto_detect_spa: default_auto_detect_spa(),
578 }
579 }
580}
581
582fn default_headless_max_concurrent() -> usize {
583 4
584}
585
586fn default_headless_wait() -> String {
587 "domcontentloaded".to_string()
588}
589
590fn default_headless_timeout_secs() -> u64 {
591 15
592}
593
594fn default_auto_detect_spa() -> bool {
595 true
596}
597
598fn default_block_images() -> bool {
599 true
600}
601
602fn default_block_fonts() -> bool {
603 true
604}
605
606fn default_block_media() -> bool {
607 true
608}
609
610fn default_block_third_party() -> bool {
611 true
612}
613
614fn default_block_service_workers() -> bool {
615 true
616}
617
618#[derive(Debug, Clone, Deserialize, Serialize)]
620#[serde(default, deny_unknown_fields)]
621pub struct ImageCaptionsConfig {
622 pub default: Option<String>,
623 pub max_tokens: usize,
624 pub max_per_page: usize,
625 pub min_width: u32,
626 pub min_height: u32,
627 #[serde(deserialize_with = "humanbytes_to_u64")]
628 pub max_bytes: u64,
629 pub max_concurrent: usize,
630}
631
632impl Default for ImageCaptionsConfig {
633 fn default() -> Self {
634 Self {
635 default: None,
636 max_tokens: 50,
637 max_per_page: 10,
638 min_width: 200,
639 min_height: 200,
640 max_bytes: 10 * 1024 * 1024,
641 max_concurrent: 2,
642 }
643 }
644}
645
646#[derive(Debug, Clone, Default, Deserialize, Serialize)]
648#[serde(default, deny_unknown_fields)]
649pub struct CaptionerConfig {
650 pub kind: String,
651 pub provider: Option<String>,
652 pub model: Option<String>,
653 pub base_url: Option<String>,
654 pub api_key_env: Option<String>,
655}
656
657pub fn parse_human_bytes(s: &str) -> Result<u64, String> {
660 let s = s.trim();
661 if let Ok(n) = s.parse::<u64>() {
662 return Ok(n);
663 }
664 let (num_str, unit) = s
665 .find(|c: char| c.is_ascii_alphabetic())
666 .map(|i| (&s[..i], &s[i..]))
667 .ok_or_else(|| format!("invalid size: {s}"))?;
668 let num: f64 = num_str
669 .trim()
670 .parse()
671 .map_err(|_| format!("invalid size number: {num_str}"))?;
672 let mult: u64 = match unit.trim().to_ascii_uppercase().as_str() {
673 "B" => 1,
674 "K" | "KB" => 1_000,
675 "KIB" => 1_024,
676 "M" | "MB" => 1_000_000,
677 "MIB" => 1_024 * 1_024,
678 "G" | "GB" => 1_000_000_000,
679 "GIB" => 1_024 * 1_024 * 1_024,
680 other => return Err(format!("unknown size unit: {other}")),
681 };
682 Ok((num * mult as f64) as u64)
683}
684
685fn humanbytes_to_u64<'de, D>(d: D) -> Result<u64, D::Error>
686where
687 D: serde::Deserializer<'de>,
688{
689 use serde::de::Error as _;
690 let v = toml::Value::deserialize(d)?;
691 match v {
692 toml::Value::Integer(n) if n >= 0 => Ok(n as u64),
693 toml::Value::String(s) => parse_human_bytes(&s).map_err(D::Error::custom),
694 other => Err(D::Error::custom(format!(
695 "expected integer bytes or humansize string, got {other:?}",
696 ))),
697 }
698}
699
700#[derive(Debug, Clone, Deserialize, Serialize)]
706#[serde(deny_unknown_fields)]
707pub struct SsrfConfig {
708 #[serde(default = "default_ssrf_level")]
709 pub level: String,
710
711 #[serde(default = "default_ssrf_project_root")]
712 pub project_root: std::path::PathBuf,
713}
714
715impl Default for SsrfConfig {
716 fn default() -> Self {
717 Self {
718 level: default_ssrf_level(),
719 project_root: default_ssrf_project_root(),
720 }
721 }
722}
723
724fn default_ssrf_level() -> String {
725 "strict".to_string()
726}
727
728fn default_ssrf_project_root() -> std::path::PathBuf {
729 std::path::PathBuf::from(".")
730}
731
732#[derive(Debug, Clone, Deserialize, Serialize)]
737#[serde(deny_unknown_fields)]
738pub struct PromptInjectionConfig {
739 #[serde(default = "default_pi_level")]
740 pub level: String,
741
742 #[serde(default = "default_pi_model")]
743 pub model: String,
744
745 #[serde(default = "default_pi_model_threshold")]
746 pub model_threshold: f64,
747
748 #[serde(default)]
749 pub allowlist: PromptInjectionAllowlist,
750
751 #[serde(default)]
752 pub agent_overrides: PromptInjectionOverrides,
753}
754
755impl Default for PromptInjectionConfig {
756 fn default() -> Self {
757 Self {
758 level: default_pi_level(),
759 model: default_pi_model(),
760 model_threshold: default_pi_model_threshold(),
761 allowlist: PromptInjectionAllowlist::default(),
762 agent_overrides: PromptInjectionOverrides::default(),
763 }
764 }
765}
766
767#[derive(Debug, Clone, Default, Deserialize, Serialize)]
770#[serde(deny_unknown_fields)]
771pub struct PromptInjectionAllowlist {
772 #[serde(default)]
773 pub wrap: Vec<String>,
774 #[serde(default)]
775 pub patterns: Vec<String>,
776 #[serde(default)]
777 pub model: Vec<String>,
778}
779
780#[derive(Debug, Clone, Default, Deserialize, Serialize)]
783#[serde(deny_unknown_fields)]
784pub struct PromptInjectionOverrides {
785 #[serde(default)]
786 pub wrap: bool,
787 #[serde(default)]
788 pub patterns: bool,
789 #[serde(default)]
790 pub model: bool,
791 #[serde(default)]
792 pub level: bool,
793}
794
795fn default_pi_level() -> String {
796 "moderate".to_string()
797}
798fn default_pi_model() -> String {
799 "disabled".to_string()
800}
801fn default_pi_model_threshold() -> f64 {
802 0.9
803}
804
805#[derive(Debug, Clone, Deserialize, Serialize)]
812#[serde(deny_unknown_fields)]
813pub struct DebugConfig {
814 #[serde(default = "default_debug_har_path")]
815 pub har_path: String,
816
817 #[serde(
818 default = "default_debug_har_body_cap",
819 deserialize_with = "deserialize_humansize"
820 )]
821 pub har_body_cap: u64,
822
823 #[serde(default = "default_debug_log_level")]
824 pub log_level: String,
825}
826
827impl Default for DebugConfig {
828 fn default() -> Self {
829 Self {
830 har_path: default_debug_har_path(),
831 har_body_cap: default_debug_har_body_cap(),
832 log_level: default_debug_log_level(),
833 }
834 }
835}
836
837fn default_debug_har_path() -> String {
838 String::new()
839}
840
841fn default_debug_har_body_cap() -> u64 {
842 64 * 1024
843}
844
845fn default_debug_log_level() -> String {
846 "info".to_string()
847}
848
849fn deserialize_humansize<'de, D>(deserializer: D) -> Result<u64, D::Error>
850where
851 D: serde::Deserializer<'de>,
852{
853 use serde::de::Error as _;
854 let v = toml::Value::deserialize(deserializer)?;
855 match v {
856 toml::Value::Integer(n) if n >= 0 => Ok(n as u64),
857 toml::Value::String(s) => parse_humansize(&s).map_err(D::Error::custom),
858 other => Err(D::Error::custom(format!(
859 "expected integer bytes or humansize string, got {other:?}",
860 ))),
861 }
862}
863
864fn parse_humansize(s: &str) -> Result<u64, String> {
865 let s = s.trim();
866 let (num_part, suffix) = s
867 .find(|c: char| c.is_alphabetic())
868 .map(|i| (&s[..i], &s[i..]))
869 .unwrap_or((s, ""));
870 let n: u64 = num_part
871 .trim()
872 .parse()
873 .map_err(|_| format!("invalid number in `{s}`"))?;
874 let mult: u64 = match suffix.trim() {
875 "" | "B" => 1,
876 "KiB" => 1024,
877 "MiB" => 1024 * 1024,
878 "GiB" => 1024 * 1024 * 1024,
879 other => {
880 return Err(format!(
881 "unknown size suffix `{other}` (expected KiB|MiB|GiB)"
882 ));
883 }
884 };
885 Ok(n * mult)
886}
887
888pub fn load(path: Option<&Path>) -> Result<Config, ConfigError> {
891 let Some(path) = path else {
892 return Ok(Config::default());
893 };
894
895 let bytes = std::fs::read_to_string(path).map_err(|source| ConfigError::Read {
896 path: path.display().to_string(),
897 source,
898 })?;
899 let mut cfg: Config = toml::from_str(&bytes).map_err(|source| ConfigError::Parse {
900 path: path.display().to_string(),
901 source,
902 })?;
903 validate(&mut cfg).map_err(|message| ConfigError::Invalid {
904 path: path.display().to_string(),
905 message,
906 })?;
907 Ok(cfg)
908}
909
910fn config_candidates_from(
917 rover_config_env: Option<&str>,
918 config_dir: Option<&Path>,
919) -> Vec<PathBuf> {
920 if let Some(p) = rover_config_env {
921 return vec![PathBuf::from(p)];
922 }
923 let mut candidates = Vec::with_capacity(2);
924 if let Some(dir) = config_dir {
925 candidates.push(dir.join("rover").join("rover.toml"));
926 }
927 candidates.push(PathBuf::from("rover.toml"));
928 candidates
929}
930
931fn config_candidates() -> Vec<PathBuf> {
932 config_candidates_from(
933 std::env::var("ROVER_CONFIG").ok().as_deref(),
934 dirs::config_dir().as_deref(),
935 )
936}
937
938pub fn default_config_path() -> PathBuf {
942 config_candidates()
943 .into_iter()
944 .next()
945 .expect("config_candidates always yields at least one path")
946}
947
948pub fn resolve_existing_config_path() -> Option<PathBuf> {
955 config_candidates().into_iter().find(|p| p.is_file())
956}
957
958pub fn load_resolved(explicit: Option<&Path>) -> Result<Config, ConfigError> {
971 if let Some(path) = explicit {
972 tracing::debug!(path = %path.display(), "loading config from --config");
973 return load(Some(path));
974 }
975 match resolve_existing_config_path() {
976 Some(path) => {
977 tracing::debug!(path = %path.display(), "loading config from resolved default path");
978 load(Some(&path))
979 }
980 None => {
981 tracing::debug!("no config file found at any default path; using built-in defaults");
982 Ok(Config::default())
983 }
984 }
985}
986
987#[cfg(test)]
991fn load_resolved_from(
992 explicit: Option<&Path>,
993 resolved_existing: Option<&Path>,
994) -> Result<Config, ConfigError> {
995 match (explicit, resolved_existing) {
996 (Some(path), _) => load(Some(path)),
997 (None, Some(path)) => load(Some(path)),
998 (None, None) => Ok(Config::default()),
999 }
1000}
1001
1002fn validate(cfg: &mut Config) -> Result<(), String> {
1003 if cfg.fetch.timeout_secs == 0 {
1004 return Err("fetch.timeout_secs must be > 0".to_string());
1005 }
1006 if cfg.cache.min_ttl > cfg.cache.default_ttl {
1007 return Err(format!(
1008 "cache.min_ttl ({:?}) must be <= cache.default_ttl ({:?})",
1009 cfg.cache.min_ttl, cfg.cache.default_ttl
1010 ));
1011 }
1012 if cfg.cache.default_ttl > cfg.cache.max_ttl {
1013 return Err(format!(
1014 "cache.default_ttl ({:?}) must be <= cache.max_ttl ({:?})",
1015 cfg.cache.default_ttl, cfg.cache.max_ttl
1016 ));
1017 }
1018 for d in &mut cfg.cache.override_no_store_domains {
1019 d.make_ascii_lowercase();
1020 }
1021 if cfg.mcp.heartbeat_interval.is_zero() {
1022 return Err("mcp.heartbeat_interval must be > 0".to_string());
1023 }
1024 if cfg.mcp.reap_threshold.is_zero() {
1025 return Err("mcp.reap_threshold must be > 0".to_string());
1026 }
1027
1028 if cfg.rate_limit.requests_per_minute_per_domain == 0 {
1030 return Err("rate_limit.requests_per_minute_per_domain must be > 0".to_string());
1031 }
1032 if cfg.rate_limit.requests_per_minute_per_domain > 6000 {
1033 return Err(format!(
1034 "rate_limit.requests_per_minute_per_domain ({}) exceeds sanity cap 6000 (100 req/s)",
1035 cfg.rate_limit.requests_per_minute_per_domain
1036 ));
1037 }
1038 if cfg.rate_limit.per_domain_concurrency == 0 {
1039 return Err("rate_limit.per_domain_concurrency must be > 0".to_string());
1040 }
1041 if cfg.rate_limit.global_concurrency == 0 {
1042 return Err("rate_limit.global_concurrency must be > 0".to_string());
1043 }
1044 if cfg.rate_limit.max_retries > 10 {
1045 return Err(format!(
1046 "rate_limit.max_retries ({}) exceeds sanity cap 10",
1047 cfg.rate_limit.max_retries
1048 ));
1049 }
1050 if cfg.rate_limit.initial_backoff > cfg.rate_limit.max_backoff {
1051 return Err(format!(
1052 "rate_limit.initial_backoff ({:?}) must be <= max_backoff ({:?})",
1053 cfg.rate_limit.initial_backoff, cfg.rate_limit.max_backoff
1054 ));
1055 }
1056 if cfg.rate_limit.retry_after_ceiling.is_zero() {
1057 return Err("rate_limit.retry_after_ceiling must be > 0".to_string());
1058 }
1059
1060 for d in &mut cfg.robots.ignore_domains {
1062 d.make_ascii_lowercase();
1063 }
1064 if cfg.robots.failure_ttl > cfg.robots.default_ttl {
1065 return Err(format!(
1066 "robots.failure_ttl ({:?}) must be <= robots.default_ttl ({:?})",
1067 cfg.robots.failure_ttl, cfg.robots.default_ttl
1068 ));
1069 }
1070
1071 Ok(())
1072}
1073
1074#[cfg(test)]
1075mod tests {
1076 use super::*;
1077 use std::io::Write;
1078
1079 #[test]
1080 fn apply_overrides_clamps_concurrency_minimum() {
1081 let mut cfg = Config::default();
1082 cfg.apply_overrides(None, Some(0), Some(0), None, false);
1083 assert_eq!(cfg.rate_limit.per_domain_concurrency, 1);
1084 assert_eq!(cfg.rate_limit.global_concurrency, 1);
1085 }
1086
1087 #[test]
1088 fn apply_overrides_leaves_unset_fields_untouched() {
1089 let mut cfg = Config::default();
1090 let baseline_rpm = cfg.rate_limit.requests_per_minute_per_domain;
1091 let baseline_retries = cfg.rate_limit.max_retries;
1092 let baseline_respect = cfg.robots.respect;
1093 cfg.apply_overrides(None, None, None, None, false);
1094 assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, baseline_rpm);
1095 assert_eq!(cfg.rate_limit.max_retries, baseline_retries);
1096 assert_eq!(cfg.robots.respect, baseline_respect);
1097 }
1098
1099 #[test]
1100 fn apply_overrides_disables_robots_when_requested() {
1101 let mut cfg = Config::default();
1102 cfg.robots.respect = true;
1105 cfg.apply_overrides(None, None, None, None, true);
1106 assert!(!cfg.robots.respect);
1107 }
1108
1109 #[test]
1110 fn apply_overrides_sets_explicit_values() {
1111 let mut cfg = Config::default();
1112 cfg.apply_overrides(Some(30), Some(4), Some(16), Some(5), false);
1113 assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, 30);
1114 assert_eq!(cfg.rate_limit.per_domain_concurrency, 4);
1115 assert_eq!(cfg.rate_limit.global_concurrency, 16);
1116 assert_eq!(cfg.rate_limit.max_retries, 5);
1117 }
1118
1119 #[test]
1120 fn default_config_has_sensible_values() {
1121 let cfg = Config::default();
1122 assert!(cfg.fetch.user_agent.starts_with("Rover/"));
1123 assert_eq!(cfg.fetch.timeout_secs, 15);
1124
1125 assert_eq!(cfg.cache.default_ttl, Duration::from_secs(15 * 60));
1127 assert_eq!(cfg.cache.min_ttl, Duration::from_secs(300));
1128 assert_eq!(cfg.cache.max_ttl, Duration::from_secs(7 * 86400));
1129 assert!(!cfg.cache.override_no_store);
1130 assert!(cfg.cache.override_no_store_domains.is_empty());
1131 assert!(!cfg.cache.store_raw_html);
1132 }
1133
1134 #[test]
1135 fn load_with_no_path_returns_default() {
1136 let cfg = load(None).unwrap();
1137 assert_eq!(cfg.fetch.timeout_secs, 15);
1138 }
1139
1140 #[test]
1141 fn load_from_file_overrides_defaults() {
1142 let mut file = tempfile::NamedTempFile::new().unwrap();
1143 writeln!(
1144 file,
1145 r#"
1146[fetch]
1147user_agent = "test-ua"
1148timeout_secs = 5
1149"#
1150 )
1151 .unwrap();
1152
1153 let cfg = load(Some(file.path())).unwrap();
1154 assert_eq!(cfg.fetch.user_agent, "test-ua");
1155 assert_eq!(cfg.fetch.timeout_secs, 5);
1156 }
1157
1158 #[test]
1159 fn load_missing_file_errors() {
1160 let result = load(Some(Path::new("/no/such/path/__rover_test__.toml")));
1161 assert!(matches!(result, Err(ConfigError::Read { .. })));
1162 }
1163
1164 #[test]
1165 fn load_malformed_toml_errors() {
1166 let mut file = tempfile::NamedTempFile::new().unwrap();
1167 writeln!(file, "not = valid = toml").unwrap();
1168 let result = load(Some(file.path()));
1169 assert!(matches!(result, Err(ConfigError::Parse { .. })));
1170 }
1171
1172 #[test]
1173 fn load_unknown_field_errors() {
1174 let mut file = tempfile::NamedTempFile::new().unwrap();
1175 writeln!(
1176 file,
1177 r#"
1178[fetch]
1179unknown_field = "x"
1180"#
1181 )
1182 .unwrap();
1183 let result = load(Some(file.path()));
1184 assert!(matches!(result, Err(ConfigError::Parse { .. })));
1185 }
1186
1187 #[test]
1188 fn load_unknown_field_in_cache_errors() {
1189 let mut file = tempfile::NamedTempFile::new().unwrap();
1190 writeln!(
1191 file,
1192 r#"
1193[cache]
1194unknown_field = "x"
1195"#
1196 )
1197 .unwrap();
1198 let result = load(Some(file.path()));
1199 assert!(matches!(result, Err(ConfigError::Parse { .. })));
1200 }
1201
1202 #[test]
1203 fn load_rejects_zero_timeout() {
1204 let mut file = tempfile::NamedTempFile::new().unwrap();
1205 writeln!(
1206 file,
1207 r#"
1208[fetch]
1209timeout_secs = 0
1210"#
1211 )
1212 .unwrap();
1213 let result = load(Some(file.path()));
1214 assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1215 }
1216
1217 #[test]
1218 fn load_cache_overrides() {
1219 let mut file = tempfile::NamedTempFile::new().unwrap();
1220 writeln!(
1221 file,
1222 r#"
1223[cache]
1224default_ttl = "30m"
1225min_ttl = "1m"
1226max_ttl = "1d"
1227override_no_store = true
1228override_no_store_domains = ["docs.example.com"]
1229store_raw_html = true
1230"#
1231 )
1232 .unwrap();
1233
1234 let cfg = load(Some(file.path())).unwrap();
1235 assert_eq!(cfg.cache.default_ttl, Duration::from_secs(30 * 60));
1236 assert_eq!(cfg.cache.min_ttl, Duration::from_secs(60));
1237 assert_eq!(cfg.cache.max_ttl, Duration::from_secs(86400));
1238 assert!(cfg.cache.override_no_store);
1239 assert_eq!(
1240 cfg.cache.override_no_store_domains,
1241 vec!["docs.example.com".to_string()]
1242 );
1243 assert!(cfg.cache.store_raw_html);
1244 }
1245
1246 #[test]
1247 fn load_rejects_min_greater_than_default() {
1248 let mut file = tempfile::NamedTempFile::new().unwrap();
1249 writeln!(
1250 file,
1251 r#"
1252[cache]
1253default_ttl = "1m"
1254min_ttl = "10m"
1255"#
1256 )
1257 .unwrap();
1258 let result = load(Some(file.path()));
1259 assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1260 }
1261
1262 #[test]
1263 fn load_rejects_default_greater_than_max() {
1264 let mut file = tempfile::NamedTempFile::new().unwrap();
1265 writeln!(
1266 file,
1267 r#"
1268[cache]
1269default_ttl = "10d"
1270max_ttl = "1d"
1271"#
1272 )
1273 .unwrap();
1274 let result = load(Some(file.path()));
1275 assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1276 }
1277
1278 #[test]
1279 fn override_no_store_domains_normalized_to_lowercase() {
1280 let mut file = tempfile::NamedTempFile::new().unwrap();
1281 writeln!(
1282 file,
1283 r#"
1284[cache]
1285override_no_store_domains = ["DOCS.example.COM", "CDN.foo.com"]
1286"#
1287 )
1288 .unwrap();
1289 let cfg = load(Some(file.path())).unwrap();
1290 assert_eq!(
1291 cfg.cache.override_no_store_domains,
1292 vec!["docs.example.com".to_string(), "cdn.foo.com".to_string()]
1293 );
1294 }
1295
1296 #[test]
1297 fn load_accepts_equal_ttls() {
1298 let mut file = tempfile::NamedTempFile::new().unwrap();
1299 writeln!(
1300 file,
1301 r#"
1302[cache]
1303default_ttl = "1h"
1304min_ttl = "1h"
1305max_ttl = "1h"
1306"#
1307 )
1308 .unwrap();
1309 let cfg = load(Some(file.path())).unwrap();
1310 assert_eq!(cfg.cache.default_ttl, Duration::from_secs(3600));
1311 }
1312
1313 #[test]
1314 fn default_tokenizer_is_o200k() {
1315 let cfg = Config::default();
1316 assert_eq!(cfg.tokenizer.default, crate::tokenizer::Tokenizer::O200k);
1317 }
1318
1319 #[test]
1320 fn default_mcp_intervals() {
1321 let cfg = Config::default();
1322 assert_eq!(cfg.mcp.heartbeat_interval, Duration::from_secs(5));
1323 assert_eq!(cfg.mcp.reap_threshold, Duration::from_secs(60));
1324 }
1325
1326 #[test]
1327 fn load_tokenizer_override() {
1328 let mut file = tempfile::NamedTempFile::new().unwrap();
1329 writeln!(
1330 file,
1331 r#"
1332[tokenizer]
1333default = "claude"
1334"#
1335 )
1336 .unwrap();
1337 let cfg = load(Some(file.path())).unwrap();
1338 assert_eq!(cfg.tokenizer.default, crate::tokenizer::Tokenizer::Claude);
1339 }
1340
1341 #[test]
1342 fn load_unknown_tokenizer_errors() {
1343 let mut file = tempfile::NamedTempFile::new().unwrap();
1344 writeln!(
1345 file,
1346 r#"
1347[tokenizer]
1348default = "gpt-5"
1349"#
1350 )
1351 .unwrap();
1352 let result = load(Some(file.path()));
1353 assert!(matches!(result, Err(ConfigError::Parse { .. })));
1354 }
1355
1356 #[test]
1357 fn load_mcp_overrides() {
1358 let mut file = tempfile::NamedTempFile::new().unwrap();
1359 writeln!(
1360 file,
1361 r#"
1362[mcp]
1363heartbeat_interval = "10s"
1364reap_threshold = "2m"
1365"#
1366 )
1367 .unwrap();
1368 let cfg = load(Some(file.path())).unwrap();
1369 assert_eq!(cfg.mcp.heartbeat_interval, Duration::from_secs(10));
1370 assert_eq!(cfg.mcp.reap_threshold, Duration::from_secs(120));
1371 }
1372
1373 #[test]
1374 fn load_output_dir_override() {
1375 let mut file = tempfile::NamedTempFile::new().unwrap();
1376 writeln!(
1377 file,
1378 r#"
1379[output]
1380dir = "/tmp/rover-out"
1381"#
1382 )
1383 .unwrap();
1384 let cfg = load(Some(file.path())).unwrap();
1385 assert_eq!(
1386 cfg.output.dir.as_deref().unwrap().to_str(),
1387 Some("/tmp/rover-out")
1388 );
1389 }
1390
1391 #[test]
1392 fn load_rejects_zero_heartbeat() {
1393 let mut file = tempfile::NamedTempFile::new().unwrap();
1394 writeln!(
1395 file,
1396 r#"
1397[mcp]
1398heartbeat_interval = "0s"
1399"#
1400 )
1401 .unwrap();
1402 let result = load(Some(file.path()));
1403 assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1404 }
1405
1406 #[test]
1407 fn default_rate_limit_matches_prd() {
1408 let cfg = Config::default();
1409 assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, 60);
1410 assert_eq!(cfg.rate_limit.per_domain_concurrency, 2);
1411 assert_eq!(cfg.rate_limit.global_concurrency, 8);
1412 assert_eq!(cfg.rate_limit.max_retries, 3);
1413 }
1414
1415 #[test]
1416 fn default_robots_matches_prd() {
1417 let cfg = Config::default();
1418 assert!(!cfg.robots.respect);
1421 assert!(cfg.robots.ignore_domains.is_empty());
1422 assert_eq!(cfg.robots.default_ttl, Duration::from_secs(24 * 3600));
1423 assert_eq!(cfg.robots.failure_ttl, Duration::from_secs(300));
1424 }
1425
1426 #[test]
1427 fn load_rate_limit_overrides() {
1428 let mut file = tempfile::NamedTempFile::new().unwrap();
1429 writeln!(
1430 file,
1431 r#"
1432[rate_limit]
1433requests_per_minute_per_domain = 120
1434per_domain_concurrency = 4
1435global_concurrency = 16
1436max_retries = 5
1437initial_backoff = "250ms"
1438max_backoff = "60s"
1439retry_after_ceiling = "10m"
1440jitter_seed = 42
1441"#
1442 )
1443 .unwrap();
1444 let cfg = load(Some(file.path())).unwrap();
1445 assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, 120);
1446 assert_eq!(cfg.rate_limit.max_retries, 5);
1447 assert_eq!(cfg.rate_limit.jitter_seed, Some(42));
1448 }
1449
1450 #[test]
1451 fn load_robots_overrides() {
1452 let mut file = tempfile::NamedTempFile::new().unwrap();
1453 writeln!(
1454 file,
1455 r#"
1456[robots]
1457respect = false
1458ignore_domains = ["FOO.example.com", "bar.example.org"]
1459default_ttl = "12h"
1460failure_ttl = "2m"
1461"#
1462 )
1463 .unwrap();
1464 let cfg = load(Some(file.path())).unwrap();
1465 assert!(!cfg.robots.respect);
1466 assert_eq!(
1467 cfg.robots.ignore_domains,
1468 vec!["foo.example.com".to_string(), "bar.example.org".to_string()]
1469 );
1470 assert_eq!(cfg.robots.default_ttl, Duration::from_secs(12 * 3600));
1471 assert_eq!(cfg.robots.failure_ttl, Duration::from_secs(120));
1472 }
1473
1474 #[test]
1475 fn load_rejects_zero_rpm() {
1476 let mut file = tempfile::NamedTempFile::new().unwrap();
1477 writeln!(
1478 file,
1479 r#"
1480[rate_limit]
1481requests_per_minute_per_domain = 0
1482"#
1483 )
1484 .unwrap();
1485 assert!(matches!(
1486 load(Some(file.path())),
1487 Err(ConfigError::Invalid { .. })
1488 ));
1489 }
1490
1491 #[test]
1492 fn load_rejects_rpm_above_sanity_cap() {
1493 let mut file = tempfile::NamedTempFile::new().unwrap();
1494 writeln!(
1495 file,
1496 r#"
1497[rate_limit]
1498requests_per_minute_per_domain = 100000
1499"#
1500 )
1501 .unwrap();
1502 assert!(matches!(
1503 load(Some(file.path())),
1504 Err(ConfigError::Invalid { .. })
1505 ));
1506 }
1507
1508 #[test]
1509 fn load_rejects_max_retries_above_10() {
1510 let mut file = tempfile::NamedTempFile::new().unwrap();
1511 writeln!(
1512 file,
1513 r#"
1514[rate_limit]
1515max_retries = 11
1516"#
1517 )
1518 .unwrap();
1519 assert!(matches!(
1520 load(Some(file.path())),
1521 Err(ConfigError::Invalid { .. })
1522 ));
1523 }
1524
1525 #[test]
1526 fn load_rejects_backoff_inversion() {
1527 let mut file = tempfile::NamedTempFile::new().unwrap();
1528 writeln!(
1529 file,
1530 r#"
1531[rate_limit]
1532initial_backoff = "10s"
1533max_backoff = "5s"
1534"#
1535 )
1536 .unwrap();
1537 assert!(matches!(
1538 load(Some(file.path())),
1539 Err(ConfigError::Invalid { .. })
1540 ));
1541 }
1542
1543 #[test]
1544 fn load_rejects_failure_ttl_above_default_ttl() {
1545 let mut file = tempfile::NamedTempFile::new().unwrap();
1546 writeln!(
1547 file,
1548 r#"
1549[robots]
1550default_ttl = "1m"
1551failure_ttl = "10m"
1552"#
1553 )
1554 .unwrap();
1555 assert!(matches!(
1556 load(Some(file.path())),
1557 Err(ConfigError::Invalid { .. })
1558 ));
1559 }
1560
1561 #[test]
1562 fn summarization_section_parses_with_defaults() {
1563 let toml = r#"
1564[summarization]
1565"#;
1566 let cfg: Config = toml::from_str(toml).unwrap();
1567 assert_eq!(cfg.summarization.default_backend, "default");
1568 assert_eq!(cfg.summarization.default_mode, "abstractive");
1569 assert_eq!(cfg.summarization.default_style, "prose");
1570 assert!(cfg.summarization.fallback_to_extractive);
1571 assert_eq!(cfg.summarization.tables.target_tokens, 150);
1572 assert!(cfg.summarization.tables.focus.contains("Describe"));
1573 }
1574
1575 #[test]
1576 fn summarization_tables_block_overrides_defaults() {
1577 let toml = r#"
1578[summarization.tables]
1579target_tokens = 250
1580focus = "Custom table focus prompt."
1581"#;
1582 let cfg: Config = toml::from_str(toml).unwrap();
1583 assert_eq!(cfg.summarization.tables.target_tokens, 250);
1584 assert_eq!(cfg.summarization.tables.focus, "Custom table focus prompt.");
1585 assert_eq!(cfg.summarization.default_backend, "default");
1587 }
1588
1589 #[test]
1590 fn backends_section_parses_extractive_block() {
1591 let toml = r#"
1592[backends.default]
1593kind = "extractive"
1594"#;
1595 let cfg: Config = toml::from_str(toml).unwrap();
1596 assert_eq!(cfg.backends.len(), 1);
1597 let b = cfg.backends.get("default").unwrap();
1598 assert_eq!(b.kind, "extractive");
1599 assert!(b.provider.is_none());
1600 }
1601
1602 #[test]
1603 fn backends_section_parses_cloud_block_with_all_fields() {
1604 let toml = r#"
1605[backends.lm_studio]
1606kind = "cloud"
1607provider = "openai_compat"
1608base_url = "http://localhost:1234/v1"
1609model = "qwen3.5-0.8b"
1610api_key_env = "LM_KEY"
1611"#;
1612 let cfg: Config = toml::from_str(toml).unwrap();
1613 let b = cfg.backends.get("lm_studio").unwrap();
1614 assert_eq!(b.kind, "cloud");
1615 assert_eq!(b.provider.as_deref(), Some("openai_compat"));
1616 assert_eq!(b.base_url.as_deref(), Some("http://localhost:1234/v1"));
1617 assert_eq!(b.model.as_deref(), Some("qwen3.5-0.8b"));
1618 assert_eq!(b.api_key_env.as_deref(), Some("LM_KEY"));
1619 }
1620
1621 #[test]
1622 fn missing_summarization_section_yields_defaults() {
1623 let cfg: Config = toml::from_str("").unwrap();
1624 assert_eq!(cfg.summarization.default_backend, "default");
1625 assert!(cfg.backends.is_empty());
1626 }
1627
1628 #[test]
1629 fn ssrf_section_parses_with_defaults() {
1630 let toml = r#"
1631[ssrf]
1632"#;
1633 let cfg: Config = toml::from_str(toml).unwrap();
1634 assert_eq!(cfg.ssrf.level, "strict");
1635 assert_eq!(cfg.ssrf.project_root, std::path::PathBuf::from("."));
1636 }
1637
1638 #[test]
1639 fn ssrf_section_accepts_each_level() {
1640 for level in &["strict", "loopback", "project", "lan", "none"] {
1641 let toml = format!("[ssrf]\nlevel = \"{level}\"\n");
1642 let cfg: Config = toml::from_str(&toml).unwrap();
1643 assert_eq!(cfg.ssrf.level, *level);
1644 }
1645 }
1646
1647 #[test]
1648 fn ssrf_section_rejects_unknown_field() {
1649 let toml = r#"
1650[ssrf]
1651level = "strict"
1652bogus = 1
1653"#;
1654 let r: Result<Config, _> = toml::from_str(toml);
1655 assert!(r.is_err(), "expected deny_unknown_fields rejection");
1656 }
1657
1658 #[test]
1659 fn missing_ssrf_section_yields_defaults() {
1660 let cfg: Config = toml::from_str("").unwrap();
1661 assert_eq!(cfg.ssrf.level, "strict");
1662 }
1663
1664 #[test]
1665 fn debug_section_parses_with_defaults() {
1666 let cfg: Config = toml::from_str("[debug]\n").unwrap();
1667 assert_eq!(cfg.debug.har_path, "");
1668 assert_eq!(cfg.debug.har_body_cap, 64 * 1024);
1669 assert_eq!(cfg.debug.log_level, "info");
1670 }
1671
1672 #[test]
1673 fn debug_section_har_body_cap_accepts_humansize() {
1674 let cfg: Config = toml::from_str(
1675 r#"[debug]
1676har_body_cap = "1MiB"
1677"#,
1678 )
1679 .unwrap();
1680 assert_eq!(cfg.debug.har_body_cap, 1024 * 1024);
1681 }
1682
1683 #[test]
1684 fn debug_section_har_body_cap_accepts_integer_bytes() {
1685 let cfg: Config = toml::from_str(
1686 r#"[debug]
1687har_body_cap = 8192
1688"#,
1689 )
1690 .unwrap();
1691 assert_eq!(cfg.debug.har_body_cap, 8192);
1692 }
1693
1694 #[test]
1695 fn debug_section_rejects_unknown_field() {
1696 let r: Result<Config, _> = toml::from_str(
1697 r#"[debug]
1698har_path = ""
1699bogus = 1
1700"#,
1701 );
1702 assert!(r.is_err());
1703 }
1704
1705 #[test]
1706 fn image_captions_defaults_match_spec() {
1707 let c = ImageCaptionsConfig::default();
1708 assert_eq!(c.max_tokens, 50);
1709 assert_eq!(c.max_per_page, 10);
1710 assert_eq!(c.min_width, 200);
1711 assert_eq!(c.min_height, 200);
1712 assert_eq!(c.max_bytes, 10 * 1024 * 1024);
1713 assert_eq!(c.max_concurrent, 2);
1714 }
1715
1716 #[test]
1717 fn human_bytes_parses_common_forms() {
1718 assert_eq!(parse_human_bytes("1024").unwrap(), 1024);
1719 assert_eq!(parse_human_bytes("10MiB").unwrap(), 10 * 1024 * 1024);
1720 assert_eq!(parse_human_bytes("10MB").unwrap(), 10_000_000);
1721 assert_eq!(
1722 parse_human_bytes("1.5GiB").unwrap(),
1723 (1.5_f64 * 1024.0 * 1024.0 * 1024.0) as u64
1724 );
1725 assert!(parse_human_bytes("bogus").is_err());
1726 }
1727
1728 #[test]
1729 fn image_captions_deserializes_from_toml() {
1730 let toml_str = r#"
1731[image_captions]
1732default = "openai"
1733max_per_page = 5
1734min_width = 100
1735min_height = 100
1736max_bytes = "1MiB"
1737"#;
1738 let cfg: Config = toml::from_str(toml_str).unwrap();
1739 assert_eq!(cfg.image_captions.default.as_deref(), Some("openai"));
1740 assert_eq!(cfg.image_captions.max_per_page, 5);
1741 assert_eq!(cfg.image_captions.max_bytes, 1024 * 1024);
1742 assert_eq!(cfg.image_captions.max_tokens, 50);
1743 }
1744
1745 #[test]
1746 fn captioners_block_round_trips() {
1747 let toml_str = r#"
1748[captioners.openai]
1749kind = "cloud"
1750provider = "openai"
1751model = "gpt-4o-mini"
1752api_key_env = "OPENAI_API_KEY"
1753
1754[captioners.local]
1755kind = "local"
1756model = "HuggingFaceTB/SmolVLM-256M-Instruct"
1757"#;
1758 let cfg: Config = toml::from_str(toml_str).unwrap();
1759 assert_eq!(cfg.captioners.len(), 2);
1760 assert_eq!(
1761 cfg.captioners.get("openai").unwrap().provider.as_deref(),
1762 Some("openai")
1763 );
1764 assert_eq!(cfg.captioners.get("local").unwrap().kind, "local");
1765 }
1766
1767 #[test]
1768 fn headless_m9_keys_default_correctly() {
1769 let h = HeadlessConfig::default();
1770 assert_eq!(h.max_concurrent, 4);
1771 assert!(h.chrome_executable.is_empty());
1772 }
1773
1774 #[test]
1775 fn prompt_injection_defaults_when_absent() {
1776 let cfg: Config = toml::from_str("").unwrap();
1777 assert_eq!(cfg.prompt_injection.level, "moderate");
1778 assert_eq!(cfg.prompt_injection.model, "disabled");
1779 assert!((cfg.prompt_injection.model_threshold - 0.9).abs() < f64::EPSILON);
1780 assert!(cfg.prompt_injection.allowlist.wrap.is_empty());
1781 assert!(cfg.prompt_injection.allowlist.patterns.is_empty());
1782 assert!(cfg.prompt_injection.allowlist.model.is_empty());
1783 assert!(!cfg.prompt_injection.agent_overrides.wrap);
1784 assert!(!cfg.prompt_injection.agent_overrides.patterns);
1785 assert!(!cfg.prompt_injection.agent_overrides.model);
1786 assert!(!cfg.prompt_injection.agent_overrides.level);
1787 }
1788
1789 #[test]
1790 fn prompt_injection_parses_full_block() {
1791 let toml = r#"
1792[prompt_injection]
1793level = "strict"
1794model = "deberta-base"
1795model_threshold = 0.75
1796
1797[prompt_injection.allowlist]
1798wrap = ["https://*.internal.example.com/*"]
1799patterns = ["*"]
1800model = []
1801
1802[prompt_injection.agent_overrides]
1803wrap = true
1804patterns = false
1805model = true
1806level = true
1807"#;
1808 let cfg: Config = toml::from_str(toml).unwrap();
1809 assert_eq!(cfg.prompt_injection.level, "strict");
1810 assert_eq!(cfg.prompt_injection.model, "deberta-base");
1811 assert!((cfg.prompt_injection.model_threshold - 0.75).abs() < f64::EPSILON);
1812 assert_eq!(
1813 cfg.prompt_injection.allowlist.wrap,
1814 vec!["https://*.internal.example.com/*".to_string()]
1815 );
1816 assert_eq!(
1817 cfg.prompt_injection.allowlist.patterns,
1818 vec!["*".to_string()]
1819 );
1820 assert!(cfg.prompt_injection.agent_overrides.wrap);
1821 assert!(!cfg.prompt_injection.agent_overrides.patterns);
1822 assert!(cfg.prompt_injection.agent_overrides.model);
1823 assert!(cfg.prompt_injection.agent_overrides.level);
1824 }
1825
1826 #[test]
1827 fn prompt_injection_rejects_unknown_field() {
1828 let toml = "[prompt_injection]\nbogus = 1\n";
1829 let r: Result<Config, _> = toml::from_str(toml);
1830 assert!(r.is_err(), "expected deny_unknown_fields rejection");
1831 }
1832
1833 #[test]
1834 fn config_candidates_prefers_rover_config_env_as_sole_candidate() {
1835 let c = config_candidates_from(Some("/custom/x.toml"), Some(Path::new("/cfg")));
1836 assert_eq!(c, vec![std::path::PathBuf::from("/custom/x.toml")]);
1837 }
1838
1839 #[test]
1840 fn config_candidates_searches_platform_then_cwd() {
1841 let c = config_candidates_from(None, Some(Path::new("/cfg")));
1842 assert_eq!(
1843 c,
1844 vec![
1845 std::path::PathBuf::from("/cfg/rover/rover.toml"),
1846 std::path::PathBuf::from("rover.toml"),
1847 ]
1848 );
1849 }
1850
1851 #[test]
1852 fn config_candidates_falls_back_to_cwd_rover_toml() {
1853 let c = config_candidates_from(None, None);
1854 assert_eq!(c, vec![std::path::PathBuf::from("rover.toml")]);
1855 }
1856
1857 #[test]
1858 fn resolve_existing_prefers_platform_over_cwd_candidate() {
1859 let tmp = tempfile::tempdir().unwrap();
1861 let rover_dir = tmp.path().join("rover");
1862 std::fs::create_dir_all(&rover_dir).unwrap();
1863 let platform_file = rover_dir.join("rover.toml");
1864 std::fs::write(&platform_file, "[fetch]\ntimeout_secs = 3\n").unwrap();
1865
1866 let resolved = config_candidates_from(None, Some(tmp.path()))
1867 .into_iter()
1868 .find(|p| p.is_file());
1869 assert_eq!(resolved, Some(platform_file));
1870 }
1871
1872 #[test]
1873 fn resolve_existing_is_none_when_no_candidate_exists() {
1874 let tmp = tempfile::tempdir().unwrap();
1875 let resolved = config_candidates_from(None, Some(tmp.path()))
1877 .into_iter()
1878 .find(|p| p.is_file());
1879 assert_eq!(resolved, None);
1880 }
1881
1882 #[test]
1883 fn load_resolved_uses_explicit_path_when_present() {
1884 let mut file = tempfile::NamedTempFile::new().unwrap();
1885 writeln!(file, "[fetch]\ntimeout_secs = 7\n").unwrap();
1886 let cfg = load_resolved_from(Some(file.path()), None).unwrap();
1888 assert_eq!(cfg.fetch.timeout_secs, 7);
1889 }
1890
1891 #[test]
1892 fn load_resolved_errors_when_explicit_path_missing() {
1893 let mut default_file = tempfile::NamedTempFile::new().unwrap();
1896 writeln!(default_file, "[fetch]\ntimeout_secs = 9\n").unwrap();
1897 let result = load_resolved_from(
1898 Some(Path::new("/no/such/__rover_explicit__.toml")),
1899 Some(default_file.path()),
1900 );
1901 assert!(matches!(result, Err(ConfigError::Read { .. })));
1902 }
1903
1904 #[test]
1905 fn load_resolved_loads_resolved_default_when_no_explicit() {
1906 let mut file = tempfile::NamedTempFile::new().unwrap();
1907 writeln!(file, "[fetch]\ntimeout_secs = 11\n").unwrap();
1908 let cfg = load_resolved_from(None, Some(file.path())).unwrap();
1909 assert_eq!(cfg.fetch.timeout_secs, 11);
1910 }
1911
1912 #[test]
1913 fn load_resolved_falls_back_to_defaults_when_nothing_resolves() {
1914 let cfg = load_resolved_from(None, None).unwrap();
1915 assert_eq!(cfg.fetch.timeout_secs, default_timeout_secs());
1916 }
1917}