1pub mod edit;
7pub mod provenance;
8
9use serde::{Deserialize, Serialize};
10use std::path::{Path, PathBuf};
11use std::time::Duration;
12use thiserror::Error;
13
14#[derive(Debug, Error)]
15pub enum ConfigError {
16 #[error("failed to read config at {path}: {source}")]
17 Read {
18 path: String,
19 source: std::io::Error,
20 },
21
22 #[error("failed to parse config at {path}: {source}")]
23 Parse {
24 path: String,
25 source: toml::de::Error,
26 },
27
28 #[error("invalid config at {path}: {message}")]
29 Invalid { path: String, message: String },
30}
31
32#[derive(Debug, Clone, Default, Deserialize, Serialize)]
33#[serde(deny_unknown_fields)]
34pub struct Config {
35 #[serde(default)]
36 pub fetch: FetchConfig,
37
38 #[serde(default)]
39 pub ssrf: SsrfConfig,
40
41 #[serde(default)]
42 pub debug: DebugConfig,
43
44 #[serde(default)]
45 pub cache: CacheConfig,
46
47 #[serde(default)]
48 pub tokenizer: TokenizerConfig,
49
50 #[serde(default)]
51 pub mcp: McpConfig,
52
53 #[serde(default)]
54 pub output: OutputConfig,
55
56 #[serde(default)]
57 pub rate_limit: RateLimitConfig,
58
59 #[serde(default)]
60 pub robots: RobotsConfig,
61
62 #[serde(default)]
63 pub summarization: SummarizationConfig,
64
65 #[serde(default)]
66 pub backends: std::collections::HashMap<String, BackendConfig>,
67
68 #[serde(default)]
69 pub headless: HeadlessConfig,
70
71 #[serde(default)]
72 pub image_captions: ImageCaptionsConfig,
73
74 #[serde(default)]
75 pub captioners: std::collections::BTreeMap<String, CaptionerConfig>,
76
77 #[serde(default)]
78 pub prompt_injection: PromptInjectionConfig,
79}
80
81#[derive(Debug, Clone, Deserialize, Serialize)]
82#[serde(deny_unknown_fields)]
83pub struct FetchConfig {
84 #[serde(default = "default_user_agent")]
85 pub user_agent: String,
86
87 #[serde(default = "default_timeout_secs")]
89 pub timeout_secs: u64,
90}
91
92impl Default for FetchConfig {
93 fn default() -> Self {
94 Self {
95 user_agent: default_user_agent(),
96 timeout_secs: default_timeout_secs(),
97 }
98 }
99}
100
101impl FetchConfig {
102 pub fn timeout(&self) -> Duration {
103 Duration::from_secs(self.timeout_secs)
104 }
105}
106
107impl Config {
108 pub fn apply_overrides(
115 &mut self,
116 rate_limit_rpm: Option<u32>,
117 per_host_concurrency: Option<u32>,
118 global_concurrency: Option<u32>,
119 max_retries: Option<u8>,
120 ignore_robots: bool,
121 ) {
122 if let Some(v) = rate_limit_rpm {
123 self.rate_limit.requests_per_minute_per_domain = v;
124 }
125 if let Some(v) = per_host_concurrency {
126 self.rate_limit.per_domain_concurrency = v.max(1);
127 }
128 if let Some(v) = global_concurrency {
129 self.rate_limit.global_concurrency = v.max(1);
130 }
131 if let Some(v) = max_retries {
132 self.rate_limit.max_retries = v;
133 }
134 if ignore_robots {
135 self.robots.respect = false;
136 }
137 }
138
139 #[cfg(any(test, feature = "test-loopback"))]
142 pub fn with_ssrf_level(mut self, level: &str) -> Self {
143 self.ssrf.level = level.to_string();
144 self
145 }
146}
147
148fn default_user_agent() -> String {
149 format!(
150 "Rover/{} (+https://github.com/aaronbassett/rover)",
151 env!("CARGO_PKG_VERSION")
152 )
153}
154
155fn default_timeout_secs() -> u64 {
156 15
157}
158
159#[derive(Debug, Clone, Deserialize, Serialize)]
162#[serde(deny_unknown_fields)]
163pub struct CacheConfig {
164 #[serde(default = "default_cache_default_ttl", with = "humantime_serde")]
165 pub default_ttl: Duration,
166
167 #[serde(default = "default_cache_min_ttl", with = "humantime_serde")]
168 pub min_ttl: Duration,
169
170 #[serde(default = "default_cache_max_ttl", with = "humantime_serde")]
171 pub max_ttl: Duration,
172
173 #[serde(default = "default_cache_swr_window", with = "humantime_serde")]
180 pub stale_while_revalidate_window: Duration,
181
182 #[serde(default)]
183 pub override_no_store: bool,
184
185 #[serde(default)]
186 pub override_no_store_domains: Vec<String>,
187
188 #[serde(default)]
191 pub store_raw_html: bool,
192}
193
194impl Default for CacheConfig {
195 fn default() -> Self {
196 Self {
197 default_ttl: default_cache_default_ttl(),
198 min_ttl: default_cache_min_ttl(),
199 max_ttl: default_cache_max_ttl(),
200 stale_while_revalidate_window: default_cache_swr_window(),
201 override_no_store: false,
202 override_no_store_domains: vec![],
203 store_raw_html: false,
204 }
205 }
206}
207
208fn default_cache_default_ttl() -> Duration {
209 Duration::from_secs(15 * 60)
214}
215
216fn default_cache_min_ttl() -> Duration {
217 Duration::from_secs(300)
218}
219
220fn default_cache_max_ttl() -> Duration {
221 Duration::from_secs(7 * 86400)
222}
223
224fn default_cache_swr_window() -> Duration {
225 Duration::from_secs(5 * 60)
226}
227
228#[derive(Debug, Clone, Deserialize, Serialize)]
231#[serde(deny_unknown_fields)]
232pub struct TokenizerConfig {
233 #[serde(default = "default_tokenizer")]
234 pub default: crate::tokenizer::Tokenizer,
235}
236
237impl Default for TokenizerConfig {
238 fn default() -> Self {
239 Self {
240 default: default_tokenizer(),
241 }
242 }
243}
244
245fn default_tokenizer() -> crate::tokenizer::Tokenizer {
246 crate::tokenizer::Tokenizer::O200k
247}
248
249#[derive(Debug, Clone, Deserialize, Serialize)]
252#[serde(deny_unknown_fields)]
253pub struct McpConfig {
254 #[serde(default = "default_heartbeat_interval", with = "humantime_serde")]
255 pub heartbeat_interval: Duration,
256
257 #[serde(default = "default_reap_threshold", with = "humantime_serde")]
258 pub reap_threshold: Duration,
259}
260
261impl Default for McpConfig {
262 fn default() -> Self {
263 Self {
264 heartbeat_interval: default_heartbeat_interval(),
265 reap_threshold: default_reap_threshold(),
266 }
267 }
268}
269
270fn default_heartbeat_interval() -> Duration {
271 Duration::from_secs(5)
272}
273
274fn default_reap_threshold() -> Duration {
275 Duration::from_secs(60)
276}
277
278#[derive(Debug, Clone, Default, Deserialize, Serialize)]
282#[serde(deny_unknown_fields)]
283pub struct OutputConfig {
284 #[serde(default)]
285 pub dir: Option<std::path::PathBuf>,
286}
287
288#[derive(Debug, Clone, Deserialize, Serialize)]
291#[serde(deny_unknown_fields)]
292pub struct RateLimitConfig {
293 #[serde(default = "default_rpm_per_domain")]
294 pub requests_per_minute_per_domain: u32,
295
296 #[serde(default = "default_per_domain_concurrency")]
297 pub per_domain_concurrency: u32,
298
299 #[serde(default = "default_global_concurrency")]
300 pub global_concurrency: u32,
301
302 #[serde(default = "default_max_retries")]
303 pub max_retries: u8,
304
305 #[serde(default = "default_initial_backoff", with = "humantime_serde")]
306 pub initial_backoff: Duration,
307
308 #[serde(default = "default_max_backoff", with = "humantime_serde")]
309 pub max_backoff: Duration,
310
311 #[serde(default = "default_retry_after_ceiling", with = "humantime_serde")]
312 pub retry_after_ceiling: Duration,
313
314 #[serde(default)]
317 pub jitter_seed: Option<u64>,
318
319 #[serde(default = "default_deferred_threshold_secs")]
323 pub deferred_retry_threshold_secs: u64,
324}
325
326impl Default for RateLimitConfig {
327 fn default() -> Self {
328 Self {
329 requests_per_minute_per_domain: default_rpm_per_domain(),
330 per_domain_concurrency: default_per_domain_concurrency(),
331 global_concurrency: default_global_concurrency(),
332 max_retries: default_max_retries(),
333 initial_backoff: default_initial_backoff(),
334 max_backoff: default_max_backoff(),
335 retry_after_ceiling: default_retry_after_ceiling(),
336 jitter_seed: None,
337 deferred_retry_threshold_secs: default_deferred_threshold_secs(),
338 }
339 }
340}
341
342fn default_rpm_per_domain() -> u32 {
343 60
344}
345fn default_per_domain_concurrency() -> u32 {
346 2
347}
348fn default_global_concurrency() -> u32 {
349 8
350}
351fn default_max_retries() -> u8 {
352 3
353}
354fn default_initial_backoff() -> Duration {
355 Duration::from_millis(500)
356}
357fn default_max_backoff() -> Duration {
358 Duration::from_secs(30)
359}
360fn default_retry_after_ceiling() -> Duration {
361 Duration::from_secs(300)
362}
363fn default_deferred_threshold_secs() -> u64 {
364 30
365}
366
367#[derive(Debug, Clone, Deserialize, Serialize)]
369#[serde(deny_unknown_fields)]
370pub struct RobotsConfig {
371 #[serde(default = "default_respect")]
372 pub respect: bool,
373
374 #[serde(default)]
377 pub ignore_domains: Vec<String>,
378
379 #[serde(default = "default_robots_ttl", with = "humantime_serde")]
381 pub default_ttl: Duration,
382
383 #[serde(default = "default_robots_failure_ttl", with = "humantime_serde")]
386 pub failure_ttl: Duration,
387}
388
389impl Default for RobotsConfig {
390 fn default() -> Self {
391 Self {
392 respect: default_respect(),
393 ignore_domains: Vec::new(),
394 default_ttl: default_robots_ttl(),
395 failure_ttl: default_robots_failure_ttl(),
396 }
397 }
398}
399
400fn default_respect() -> bool {
401 false
406}
407fn default_robots_ttl() -> Duration {
408 Duration::from_secs(24 * 3600)
409}
410fn default_robots_failure_ttl() -> Duration {
411 Duration::from_secs(5 * 60)
412}
413
414#[derive(Debug, Clone, Deserialize, Serialize)]
416#[serde(deny_unknown_fields)]
417pub struct SummarizationConfig {
418 #[serde(default = "default_summarization_backend")]
419 pub default_backend: String,
420
421 #[serde(default = "default_summarization_mode")]
422 pub default_mode: String,
423
424 #[serde(default = "default_summarization_style")]
425 pub default_style: String,
426
427 #[serde(default = "default_summarization_fallback")]
428 pub fallback_to_extractive: bool,
429
430 #[serde(default)]
434 pub tables: TablesSummarizationConfig,
435}
436
437impl Default for SummarizationConfig {
438 fn default() -> Self {
439 Self {
440 default_backend: default_summarization_backend(),
441 default_mode: default_summarization_mode(),
442 default_style: default_summarization_style(),
443 fallback_to_extractive: default_summarization_fallback(),
444 tables: TablesSummarizationConfig::default(),
445 }
446 }
447}
448
449fn default_summarization_backend() -> String {
450 "default".to_string()
451}
452fn default_summarization_mode() -> String {
453 "abstractive".to_string()
454}
455fn default_summarization_style() -> String {
456 "prose".to_string()
457}
458fn default_summarization_fallback() -> bool {
459 true
460}
461
462#[derive(Debug, Clone, Deserialize, Serialize)]
465#[serde(deny_unknown_fields)]
466pub struct TablesSummarizationConfig {
467 #[serde(default = "default_tables_target_tokens")]
468 pub target_tokens: usize,
469 #[serde(default = "default_tables_focus")]
470 pub focus: String,
471}
472
473impl Default for TablesSummarizationConfig {
474 fn default() -> Self {
475 Self {
476 target_tokens: default_tables_target_tokens(),
477 focus: default_tables_focus(),
478 }
479 }
480}
481
482fn default_tables_target_tokens() -> usize {
483 150
484}
485fn default_tables_focus() -> String {
486 "Describe what this table shows. Highlight any extreme values or notable rows.".to_string()
487}
488
489#[derive(Debug, Clone, Deserialize, Serialize, Default)]
493#[serde(deny_unknown_fields)]
494pub struct BackendConfig {
495 pub kind: String,
496 #[serde(default)]
497 pub provider: Option<String>,
498 #[serde(default)]
499 pub model: Option<String>,
500 #[serde(default)]
501 pub base_url: Option<String>,
502 #[serde(default)]
503 pub api_key_env: Option<String>,
504}
505
506#[derive(Debug, Clone, Deserialize, Serialize)]
508#[serde(deny_unknown_fields)]
509pub struct HeadlessConfig {
510 #[serde(default = "default_headless_max_concurrent")]
511 pub max_concurrent: usize,
512
513 #[serde(default)]
515 pub chrome_executable: String,
516
517 #[serde(default = "default_block_images")]
519 pub block_images: bool,
520
521 #[serde(default = "default_block_fonts")]
523 pub block_fonts: bool,
524
525 #[serde(default = "default_block_media")]
527 pub block_media: bool,
528
529 #[serde(default)]
532 pub block_css: bool,
533
534 #[serde(default = "default_block_third_party")]
536 pub block_third_party: bool,
537
538 #[serde(default = "default_block_service_workers")]
541 pub block_service_workers: bool,
542
543 #[serde(default = "default_headless_wait")]
546 pub default_wait: String,
547
548 #[serde(default = "default_headless_timeout_secs")]
550 pub timeout_secs: u64,
551
552 #[serde(default = "default_auto_detect_spa")]
554 pub auto_detect_spa: bool,
555
556 #[serde(default = "default_headless_launch_delay_secs")]
563 pub launch_delay_secs: u64,
564}
565
566impl HeadlessConfig {
567 pub fn timeout(&self) -> std::time::Duration {
569 std::time::Duration::from_secs(self.timeout_secs)
570 }
571
572 pub fn launch_delay(&self) -> std::time::Duration {
574 std::time::Duration::from_secs(self.launch_delay_secs)
575 }
576}
577
578impl Default for HeadlessConfig {
579 fn default() -> Self {
580 Self {
581 max_concurrent: default_headless_max_concurrent(),
582 chrome_executable: String::new(),
583 block_images: default_block_images(),
584 block_fonts: default_block_fonts(),
585 block_media: default_block_media(),
586 block_css: false,
587 block_third_party: default_block_third_party(),
588 block_service_workers: default_block_service_workers(),
589 default_wait: default_headless_wait(),
590 timeout_secs: default_headless_timeout_secs(),
591 auto_detect_spa: default_auto_detect_spa(),
592 launch_delay_secs: default_headless_launch_delay_secs(),
593 }
594 }
595}
596
597fn default_headless_max_concurrent() -> usize {
598 4
599}
600
601fn default_headless_wait() -> String {
602 "domcontentloaded".to_string()
603}
604
605fn default_headless_timeout_secs() -> u64 {
606 15
607}
608
609fn default_headless_launch_delay_secs() -> u64 {
610 2
611}
612
613fn default_auto_detect_spa() -> bool {
614 true
615}
616
617fn default_block_images() -> bool {
618 true
619}
620
621fn default_block_fonts() -> bool {
622 true
623}
624
625fn default_block_media() -> bool {
626 true
627}
628
629fn default_block_third_party() -> bool {
630 true
631}
632
633fn default_block_service_workers() -> bool {
634 true
635}
636
637#[derive(Debug, Clone, Deserialize, Serialize)]
639#[serde(default, deny_unknown_fields)]
640pub struct ImageCaptionsConfig {
641 pub default: Option<String>,
642 pub max_tokens: usize,
643 pub max_per_page: usize,
644 pub min_width: u32,
645 pub min_height: u32,
646 #[serde(deserialize_with = "humanbytes_to_u64")]
647 pub max_bytes: u64,
648 pub max_concurrent: usize,
649}
650
651impl Default for ImageCaptionsConfig {
652 fn default() -> Self {
653 Self {
654 default: None,
655 max_tokens: 50,
656 max_per_page: 10,
657 min_width: 200,
658 min_height: 200,
659 max_bytes: 10 * 1024 * 1024,
660 max_concurrent: 2,
661 }
662 }
663}
664
665#[derive(Debug, Clone, Default, Deserialize, Serialize)]
667#[serde(default, deny_unknown_fields)]
668pub struct CaptionerConfig {
669 pub kind: String,
670 pub provider: Option<String>,
671 pub model: Option<String>,
672 pub base_url: Option<String>,
673 pub api_key_env: Option<String>,
674}
675
676pub fn parse_human_bytes(s: &str) -> Result<u64, String> {
679 let s = s.trim();
680 if let Ok(n) = s.parse::<u64>() {
681 return Ok(n);
682 }
683 let (num_str, unit) = s
684 .find(|c: char| c.is_ascii_alphabetic())
685 .map(|i| (&s[..i], &s[i..]))
686 .ok_or_else(|| format!("invalid size: {s}"))?;
687 let num: f64 = num_str
688 .trim()
689 .parse()
690 .map_err(|_| format!("invalid size number: {num_str}"))?;
691 let mult: u64 = match unit.trim().to_ascii_uppercase().as_str() {
692 "B" => 1,
693 "K" | "KB" => 1_000,
694 "KIB" => 1_024,
695 "M" | "MB" => 1_000_000,
696 "MIB" => 1_024 * 1_024,
697 "G" | "GB" => 1_000_000_000,
698 "GIB" => 1_024 * 1_024 * 1_024,
699 other => return Err(format!("unknown size unit: {other}")),
700 };
701 Ok((num * mult as f64) as u64)
702}
703
704fn humanbytes_to_u64<'de, D>(d: D) -> Result<u64, D::Error>
705where
706 D: serde::Deserializer<'de>,
707{
708 use serde::de::Error as _;
709 let v = toml::Value::deserialize(d)?;
710 match v {
711 toml::Value::Integer(n) if n >= 0 => Ok(n as u64),
712 toml::Value::String(s) => parse_human_bytes(&s).map_err(D::Error::custom),
713 other => Err(D::Error::custom(format!(
714 "expected integer bytes or humansize string, got {other:?}",
715 ))),
716 }
717}
718
719#[derive(Debug, Clone, Deserialize, Serialize)]
725#[serde(deny_unknown_fields)]
726pub struct SsrfConfig {
727 #[serde(default = "default_ssrf_level")]
728 pub level: String,
729
730 #[serde(default = "default_ssrf_project_root")]
731 pub project_root: std::path::PathBuf,
732}
733
734impl Default for SsrfConfig {
735 fn default() -> Self {
736 Self {
737 level: default_ssrf_level(),
738 project_root: default_ssrf_project_root(),
739 }
740 }
741}
742
743fn default_ssrf_level() -> String {
744 "strict".to_string()
745}
746
747fn default_ssrf_project_root() -> std::path::PathBuf {
748 std::path::PathBuf::from(".")
749}
750
751#[derive(Debug, Clone, Deserialize, Serialize)]
756#[serde(deny_unknown_fields)]
757pub struct PromptInjectionConfig {
758 #[serde(default = "default_pi_level")]
759 pub level: String,
760
761 #[serde(default = "default_pi_model")]
762 pub model: String,
763
764 #[serde(default = "default_pi_model_threshold")]
765 pub model_threshold: f64,
766
767 #[serde(default)]
768 pub allowlist: PromptInjectionAllowlist,
769
770 #[serde(default)]
771 pub agent_overrides: PromptInjectionOverrides,
772}
773
774impl Default for PromptInjectionConfig {
775 fn default() -> Self {
776 Self {
777 level: default_pi_level(),
778 model: default_pi_model(),
779 model_threshold: default_pi_model_threshold(),
780 allowlist: PromptInjectionAllowlist::default(),
781 agent_overrides: PromptInjectionOverrides::default(),
782 }
783 }
784}
785
786#[derive(Debug, Clone, Default, Deserialize, Serialize)]
789#[serde(deny_unknown_fields)]
790pub struct PromptInjectionAllowlist {
791 #[serde(default)]
792 pub wrap: Vec<String>,
793 #[serde(default)]
794 pub patterns: Vec<String>,
795 #[serde(default)]
796 pub model: Vec<String>,
797}
798
799#[derive(Debug, Clone, Default, Deserialize, Serialize)]
802#[serde(deny_unknown_fields)]
803pub struct PromptInjectionOverrides {
804 #[serde(default)]
805 pub wrap: bool,
806 #[serde(default)]
807 pub patterns: bool,
808 #[serde(default)]
809 pub model: bool,
810 #[serde(default)]
811 pub level: bool,
812}
813
814fn default_pi_level() -> String {
815 "moderate".to_string()
816}
817fn default_pi_model() -> String {
818 "disabled".to_string()
819}
820fn default_pi_model_threshold() -> f64 {
821 0.9
822}
823
824#[derive(Debug, Clone, Deserialize, Serialize)]
831#[serde(deny_unknown_fields)]
832pub struct DebugConfig {
833 #[serde(default = "default_debug_har_path")]
834 pub har_path: String,
835
836 #[serde(
837 default = "default_debug_har_body_cap",
838 deserialize_with = "deserialize_humansize"
839 )]
840 pub har_body_cap: u64,
841
842 #[serde(default = "default_debug_log_level")]
843 pub log_level: String,
844}
845
846impl Default for DebugConfig {
847 fn default() -> Self {
848 Self {
849 har_path: default_debug_har_path(),
850 har_body_cap: default_debug_har_body_cap(),
851 log_level: default_debug_log_level(),
852 }
853 }
854}
855
856fn default_debug_har_path() -> String {
857 String::new()
858}
859
860fn default_debug_har_body_cap() -> u64 {
861 64 * 1024
862}
863
864fn default_debug_log_level() -> String {
865 "info".to_string()
866}
867
868fn deserialize_humansize<'de, D>(deserializer: D) -> Result<u64, D::Error>
869where
870 D: serde::Deserializer<'de>,
871{
872 use serde::de::Error as _;
873 let v = toml::Value::deserialize(deserializer)?;
874 match v {
875 toml::Value::Integer(n) if n >= 0 => Ok(n as u64),
876 toml::Value::String(s) => parse_humansize(&s).map_err(D::Error::custom),
877 other => Err(D::Error::custom(format!(
878 "expected integer bytes or humansize string, got {other:?}",
879 ))),
880 }
881}
882
883fn parse_humansize(s: &str) -> Result<u64, String> {
884 let s = s.trim();
885 let (num_part, suffix) = s
886 .find(|c: char| c.is_alphabetic())
887 .map(|i| (&s[..i], &s[i..]))
888 .unwrap_or((s, ""));
889 let n: u64 = num_part
890 .trim()
891 .parse()
892 .map_err(|_| format!("invalid number in `{s}`"))?;
893 let mult: u64 = match suffix.trim() {
894 "" | "B" => 1,
895 "KiB" => 1024,
896 "MiB" => 1024 * 1024,
897 "GiB" => 1024 * 1024 * 1024,
898 other => {
899 return Err(format!(
900 "unknown size suffix `{other}` (expected KiB|MiB|GiB)"
901 ));
902 }
903 };
904 Ok(n * mult)
905}
906
907pub fn load(path: Option<&Path>) -> Result<Config, ConfigError> {
910 let Some(path) = path else {
911 return Ok(Config::default());
912 };
913
914 let bytes = std::fs::read_to_string(path).map_err(|source| ConfigError::Read {
915 path: path.display().to_string(),
916 source,
917 })?;
918 let mut cfg: Config = toml::from_str(&bytes).map_err(|source| ConfigError::Parse {
919 path: path.display().to_string(),
920 source,
921 })?;
922 validate(&mut cfg).map_err(|message| ConfigError::Invalid {
923 path: path.display().to_string(),
924 message,
925 })?;
926 Ok(cfg)
927}
928
929fn config_candidates_from(
936 rover_config_env: Option<&str>,
937 config_dir: Option<&Path>,
938) -> Vec<PathBuf> {
939 if let Some(p) = rover_config_env {
940 return vec![PathBuf::from(p)];
941 }
942 let mut candidates = Vec::with_capacity(2);
943 if let Some(dir) = config_dir {
944 candidates.push(dir.join("rover").join("rover.toml"));
945 }
946 candidates.push(PathBuf::from("rover.toml"));
947 candidates
948}
949
950fn config_candidates() -> Vec<PathBuf> {
951 config_candidates_from(
952 std::env::var("ROVER_CONFIG").ok().as_deref(),
953 dirs::config_dir().as_deref(),
954 )
955}
956
957pub fn default_config_path() -> PathBuf {
961 config_candidates()
962 .into_iter()
963 .next()
964 .expect("config_candidates always yields at least one path")
965}
966
967pub fn resolve_existing_config_path() -> Option<PathBuf> {
974 config_candidates().into_iter().find(|p| p.is_file())
975}
976
977pub fn load_resolved(explicit: Option<&Path>) -> Result<Config, ConfigError> {
990 if let Some(path) = explicit {
991 tracing::debug!(path = %path.display(), "loading config from --config");
992 return load(Some(path));
993 }
994 match resolve_existing_config_path() {
995 Some(path) => {
996 tracing::debug!(path = %path.display(), "loading config from resolved default path");
997 load(Some(&path))
998 }
999 None => {
1000 tracing::debug!("no config file found at any default path; using built-in defaults");
1001 Ok(Config::default())
1002 }
1003 }
1004}
1005
1006#[cfg(test)]
1010fn load_resolved_from(
1011 explicit: Option<&Path>,
1012 resolved_existing: Option<&Path>,
1013) -> Result<Config, ConfigError> {
1014 match (explicit, resolved_existing) {
1015 (Some(path), _) => load(Some(path)),
1016 (None, Some(path)) => load(Some(path)),
1017 (None, None) => Ok(Config::default()),
1018 }
1019}
1020
1021fn validate(cfg: &mut Config) -> Result<(), String> {
1022 if cfg.fetch.timeout_secs == 0 {
1023 return Err("fetch.timeout_secs must be > 0".to_string());
1024 }
1025 if cfg.cache.min_ttl > cfg.cache.default_ttl {
1026 return Err(format!(
1027 "cache.min_ttl ({:?}) must be <= cache.default_ttl ({:?})",
1028 cfg.cache.min_ttl, cfg.cache.default_ttl
1029 ));
1030 }
1031 if cfg.cache.default_ttl > cfg.cache.max_ttl {
1032 return Err(format!(
1033 "cache.default_ttl ({:?}) must be <= cache.max_ttl ({:?})",
1034 cfg.cache.default_ttl, cfg.cache.max_ttl
1035 ));
1036 }
1037 for d in &mut cfg.cache.override_no_store_domains {
1038 d.make_ascii_lowercase();
1039 }
1040 if cfg.mcp.heartbeat_interval.is_zero() {
1041 return Err("mcp.heartbeat_interval must be > 0".to_string());
1042 }
1043 if cfg.mcp.reap_threshold.is_zero() {
1044 return Err("mcp.reap_threshold must be > 0".to_string());
1045 }
1046
1047 if cfg.rate_limit.requests_per_minute_per_domain == 0 {
1049 return Err("rate_limit.requests_per_minute_per_domain must be > 0".to_string());
1050 }
1051 if cfg.rate_limit.requests_per_minute_per_domain > 6000 {
1052 return Err(format!(
1053 "rate_limit.requests_per_minute_per_domain ({}) exceeds sanity cap 6000 (100 req/s)",
1054 cfg.rate_limit.requests_per_minute_per_domain
1055 ));
1056 }
1057 if cfg.rate_limit.per_domain_concurrency == 0 {
1058 return Err("rate_limit.per_domain_concurrency must be > 0".to_string());
1059 }
1060 if cfg.rate_limit.global_concurrency == 0 {
1061 return Err("rate_limit.global_concurrency must be > 0".to_string());
1062 }
1063 if cfg.rate_limit.max_retries > 10 {
1064 return Err(format!(
1065 "rate_limit.max_retries ({}) exceeds sanity cap 10",
1066 cfg.rate_limit.max_retries
1067 ));
1068 }
1069 if cfg.rate_limit.initial_backoff > cfg.rate_limit.max_backoff {
1070 return Err(format!(
1071 "rate_limit.initial_backoff ({:?}) must be <= max_backoff ({:?})",
1072 cfg.rate_limit.initial_backoff, cfg.rate_limit.max_backoff
1073 ));
1074 }
1075 if cfg.rate_limit.retry_after_ceiling.is_zero() {
1076 return Err("rate_limit.retry_after_ceiling must be > 0".to_string());
1077 }
1078
1079 for d in &mut cfg.robots.ignore_domains {
1081 d.make_ascii_lowercase();
1082 }
1083 if cfg.robots.failure_ttl > cfg.robots.default_ttl {
1084 return Err(format!(
1085 "robots.failure_ttl ({:?}) must be <= robots.default_ttl ({:?})",
1086 cfg.robots.failure_ttl, cfg.robots.default_ttl
1087 ));
1088 }
1089
1090 Ok(())
1091}
1092
1093#[cfg(test)]
1094mod tests {
1095 use super::*;
1096 use std::io::Write;
1097
1098 #[test]
1099 fn apply_overrides_clamps_concurrency_minimum() {
1100 let mut cfg = Config::default();
1101 cfg.apply_overrides(None, Some(0), Some(0), None, false);
1102 assert_eq!(cfg.rate_limit.per_domain_concurrency, 1);
1103 assert_eq!(cfg.rate_limit.global_concurrency, 1);
1104 }
1105
1106 #[test]
1107 fn apply_overrides_leaves_unset_fields_untouched() {
1108 let mut cfg = Config::default();
1109 let baseline_rpm = cfg.rate_limit.requests_per_minute_per_domain;
1110 let baseline_retries = cfg.rate_limit.max_retries;
1111 let baseline_respect = cfg.robots.respect;
1112 cfg.apply_overrides(None, None, None, None, false);
1113 assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, baseline_rpm);
1114 assert_eq!(cfg.rate_limit.max_retries, baseline_retries);
1115 assert_eq!(cfg.robots.respect, baseline_respect);
1116 }
1117
1118 #[test]
1119 fn apply_overrides_disables_robots_when_requested() {
1120 let mut cfg = Config::default();
1121 cfg.robots.respect = true;
1124 cfg.apply_overrides(None, None, None, None, true);
1125 assert!(!cfg.robots.respect);
1126 }
1127
1128 #[test]
1129 fn apply_overrides_sets_explicit_values() {
1130 let mut cfg = Config::default();
1131 cfg.apply_overrides(Some(30), Some(4), Some(16), Some(5), false);
1132 assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, 30);
1133 assert_eq!(cfg.rate_limit.per_domain_concurrency, 4);
1134 assert_eq!(cfg.rate_limit.global_concurrency, 16);
1135 assert_eq!(cfg.rate_limit.max_retries, 5);
1136 }
1137
1138 #[test]
1139 fn default_config_has_sensible_values() {
1140 let cfg = Config::default();
1141 assert!(cfg.fetch.user_agent.starts_with("Rover/"));
1142 assert_eq!(cfg.fetch.timeout_secs, 15);
1143
1144 assert_eq!(cfg.cache.default_ttl, Duration::from_secs(15 * 60));
1146 assert_eq!(cfg.cache.min_ttl, Duration::from_secs(300));
1147 assert_eq!(cfg.cache.max_ttl, Duration::from_secs(7 * 86400));
1148 assert!(!cfg.cache.override_no_store);
1149 assert!(cfg.cache.override_no_store_domains.is_empty());
1150 assert!(!cfg.cache.store_raw_html);
1151 }
1152
1153 #[test]
1154 fn load_with_no_path_returns_default() {
1155 let cfg = load(None).unwrap();
1156 assert_eq!(cfg.fetch.timeout_secs, 15);
1157 }
1158
1159 #[test]
1160 fn load_from_file_overrides_defaults() {
1161 let mut file = tempfile::NamedTempFile::new().unwrap();
1162 writeln!(
1163 file,
1164 r#"
1165[fetch]
1166user_agent = "test-ua"
1167timeout_secs = 5
1168"#
1169 )
1170 .unwrap();
1171
1172 let cfg = load(Some(file.path())).unwrap();
1173 assert_eq!(cfg.fetch.user_agent, "test-ua");
1174 assert_eq!(cfg.fetch.timeout_secs, 5);
1175 }
1176
1177 #[test]
1178 fn load_missing_file_errors() {
1179 let result = load(Some(Path::new("/no/such/path/__rover_test__.toml")));
1180 assert!(matches!(result, Err(ConfigError::Read { .. })));
1181 }
1182
1183 #[test]
1184 fn load_malformed_toml_errors() {
1185 let mut file = tempfile::NamedTempFile::new().unwrap();
1186 writeln!(file, "not = valid = toml").unwrap();
1187 let result = load(Some(file.path()));
1188 assert!(matches!(result, Err(ConfigError::Parse { .. })));
1189 }
1190
1191 #[test]
1192 fn load_unknown_field_errors() {
1193 let mut file = tempfile::NamedTempFile::new().unwrap();
1194 writeln!(
1195 file,
1196 r#"
1197[fetch]
1198unknown_field = "x"
1199"#
1200 )
1201 .unwrap();
1202 let result = load(Some(file.path()));
1203 assert!(matches!(result, Err(ConfigError::Parse { .. })));
1204 }
1205
1206 #[test]
1207 fn load_unknown_field_in_cache_errors() {
1208 let mut file = tempfile::NamedTempFile::new().unwrap();
1209 writeln!(
1210 file,
1211 r#"
1212[cache]
1213unknown_field = "x"
1214"#
1215 )
1216 .unwrap();
1217 let result = load(Some(file.path()));
1218 assert!(matches!(result, Err(ConfigError::Parse { .. })));
1219 }
1220
1221 #[test]
1222 fn load_rejects_zero_timeout() {
1223 let mut file = tempfile::NamedTempFile::new().unwrap();
1224 writeln!(
1225 file,
1226 r#"
1227[fetch]
1228timeout_secs = 0
1229"#
1230 )
1231 .unwrap();
1232 let result = load(Some(file.path()));
1233 assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1234 }
1235
1236 #[test]
1237 fn load_cache_overrides() {
1238 let mut file = tempfile::NamedTempFile::new().unwrap();
1239 writeln!(
1240 file,
1241 r#"
1242[cache]
1243default_ttl = "30m"
1244min_ttl = "1m"
1245max_ttl = "1d"
1246override_no_store = true
1247override_no_store_domains = ["docs.example.com"]
1248store_raw_html = true
1249"#
1250 )
1251 .unwrap();
1252
1253 let cfg = load(Some(file.path())).unwrap();
1254 assert_eq!(cfg.cache.default_ttl, Duration::from_secs(30 * 60));
1255 assert_eq!(cfg.cache.min_ttl, Duration::from_secs(60));
1256 assert_eq!(cfg.cache.max_ttl, Duration::from_secs(86400));
1257 assert!(cfg.cache.override_no_store);
1258 assert_eq!(
1259 cfg.cache.override_no_store_domains,
1260 vec!["docs.example.com".to_string()]
1261 );
1262 assert!(cfg.cache.store_raw_html);
1263 }
1264
1265 #[test]
1266 fn load_rejects_min_greater_than_default() {
1267 let mut file = tempfile::NamedTempFile::new().unwrap();
1268 writeln!(
1269 file,
1270 r#"
1271[cache]
1272default_ttl = "1m"
1273min_ttl = "10m"
1274"#
1275 )
1276 .unwrap();
1277 let result = load(Some(file.path()));
1278 assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1279 }
1280
1281 #[test]
1282 fn load_rejects_default_greater_than_max() {
1283 let mut file = tempfile::NamedTempFile::new().unwrap();
1284 writeln!(
1285 file,
1286 r#"
1287[cache]
1288default_ttl = "10d"
1289max_ttl = "1d"
1290"#
1291 )
1292 .unwrap();
1293 let result = load(Some(file.path()));
1294 assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1295 }
1296
1297 #[test]
1298 fn override_no_store_domains_normalized_to_lowercase() {
1299 let mut file = tempfile::NamedTempFile::new().unwrap();
1300 writeln!(
1301 file,
1302 r#"
1303[cache]
1304override_no_store_domains = ["DOCS.example.COM", "CDN.foo.com"]
1305"#
1306 )
1307 .unwrap();
1308 let cfg = load(Some(file.path())).unwrap();
1309 assert_eq!(
1310 cfg.cache.override_no_store_domains,
1311 vec!["docs.example.com".to_string(), "cdn.foo.com".to_string()]
1312 );
1313 }
1314
1315 #[test]
1316 fn load_accepts_equal_ttls() {
1317 let mut file = tempfile::NamedTempFile::new().unwrap();
1318 writeln!(
1319 file,
1320 r#"
1321[cache]
1322default_ttl = "1h"
1323min_ttl = "1h"
1324max_ttl = "1h"
1325"#
1326 )
1327 .unwrap();
1328 let cfg = load(Some(file.path())).unwrap();
1329 assert_eq!(cfg.cache.default_ttl, Duration::from_secs(3600));
1330 }
1331
1332 #[test]
1333 fn default_tokenizer_is_o200k() {
1334 let cfg = Config::default();
1335 assert_eq!(cfg.tokenizer.default, crate::tokenizer::Tokenizer::O200k);
1336 }
1337
1338 #[test]
1339 fn default_mcp_intervals() {
1340 let cfg = Config::default();
1341 assert_eq!(cfg.mcp.heartbeat_interval, Duration::from_secs(5));
1342 assert_eq!(cfg.mcp.reap_threshold, Duration::from_secs(60));
1343 }
1344
1345 #[test]
1346 fn load_tokenizer_override() {
1347 let mut file = tempfile::NamedTempFile::new().unwrap();
1348 writeln!(
1349 file,
1350 r#"
1351[tokenizer]
1352default = "claude"
1353"#
1354 )
1355 .unwrap();
1356 let cfg = load(Some(file.path())).unwrap();
1357 assert_eq!(cfg.tokenizer.default, crate::tokenizer::Tokenizer::Claude);
1358 }
1359
1360 #[test]
1361 fn load_unknown_tokenizer_errors() {
1362 let mut file = tempfile::NamedTempFile::new().unwrap();
1363 writeln!(
1364 file,
1365 r#"
1366[tokenizer]
1367default = "gpt-5"
1368"#
1369 )
1370 .unwrap();
1371 let result = load(Some(file.path()));
1372 assert!(matches!(result, Err(ConfigError::Parse { .. })));
1373 }
1374
1375 #[test]
1376 fn load_mcp_overrides() {
1377 let mut file = tempfile::NamedTempFile::new().unwrap();
1378 writeln!(
1379 file,
1380 r#"
1381[mcp]
1382heartbeat_interval = "10s"
1383reap_threshold = "2m"
1384"#
1385 )
1386 .unwrap();
1387 let cfg = load(Some(file.path())).unwrap();
1388 assert_eq!(cfg.mcp.heartbeat_interval, Duration::from_secs(10));
1389 assert_eq!(cfg.mcp.reap_threshold, Duration::from_secs(120));
1390 }
1391
1392 #[test]
1393 fn load_output_dir_override() {
1394 let mut file = tempfile::NamedTempFile::new().unwrap();
1395 writeln!(
1396 file,
1397 r#"
1398[output]
1399dir = "/tmp/rover-out"
1400"#
1401 )
1402 .unwrap();
1403 let cfg = load(Some(file.path())).unwrap();
1404 assert_eq!(
1405 cfg.output.dir.as_deref().unwrap().to_str(),
1406 Some("/tmp/rover-out")
1407 );
1408 }
1409
1410 #[test]
1411 fn load_rejects_zero_heartbeat() {
1412 let mut file = tempfile::NamedTempFile::new().unwrap();
1413 writeln!(
1414 file,
1415 r#"
1416[mcp]
1417heartbeat_interval = "0s"
1418"#
1419 )
1420 .unwrap();
1421 let result = load(Some(file.path()));
1422 assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1423 }
1424
1425 #[test]
1426 fn default_rate_limit_matches_prd() {
1427 let cfg = Config::default();
1428 assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, 60);
1429 assert_eq!(cfg.rate_limit.per_domain_concurrency, 2);
1430 assert_eq!(cfg.rate_limit.global_concurrency, 8);
1431 assert_eq!(cfg.rate_limit.max_retries, 3);
1432 }
1433
1434 #[test]
1435 fn default_robots_matches_prd() {
1436 let cfg = Config::default();
1437 assert!(!cfg.robots.respect);
1440 assert!(cfg.robots.ignore_domains.is_empty());
1441 assert_eq!(cfg.robots.default_ttl, Duration::from_secs(24 * 3600));
1442 assert_eq!(cfg.robots.failure_ttl, Duration::from_secs(300));
1443 }
1444
1445 #[test]
1446 fn load_rate_limit_overrides() {
1447 let mut file = tempfile::NamedTempFile::new().unwrap();
1448 writeln!(
1449 file,
1450 r#"
1451[rate_limit]
1452requests_per_minute_per_domain = 120
1453per_domain_concurrency = 4
1454global_concurrency = 16
1455max_retries = 5
1456initial_backoff = "250ms"
1457max_backoff = "60s"
1458retry_after_ceiling = "10m"
1459jitter_seed = 42
1460"#
1461 )
1462 .unwrap();
1463 let cfg = load(Some(file.path())).unwrap();
1464 assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, 120);
1465 assert_eq!(cfg.rate_limit.max_retries, 5);
1466 assert_eq!(cfg.rate_limit.jitter_seed, Some(42));
1467 }
1468
1469 #[test]
1470 fn load_robots_overrides() {
1471 let mut file = tempfile::NamedTempFile::new().unwrap();
1472 writeln!(
1473 file,
1474 r#"
1475[robots]
1476respect = false
1477ignore_domains = ["FOO.example.com", "bar.example.org"]
1478default_ttl = "12h"
1479failure_ttl = "2m"
1480"#
1481 )
1482 .unwrap();
1483 let cfg = load(Some(file.path())).unwrap();
1484 assert!(!cfg.robots.respect);
1485 assert_eq!(
1486 cfg.robots.ignore_domains,
1487 vec!["foo.example.com".to_string(), "bar.example.org".to_string()]
1488 );
1489 assert_eq!(cfg.robots.default_ttl, Duration::from_secs(12 * 3600));
1490 assert_eq!(cfg.robots.failure_ttl, Duration::from_secs(120));
1491 }
1492
1493 #[test]
1494 fn load_rejects_zero_rpm() {
1495 let mut file = tempfile::NamedTempFile::new().unwrap();
1496 writeln!(
1497 file,
1498 r#"
1499[rate_limit]
1500requests_per_minute_per_domain = 0
1501"#
1502 )
1503 .unwrap();
1504 assert!(matches!(
1505 load(Some(file.path())),
1506 Err(ConfigError::Invalid { .. })
1507 ));
1508 }
1509
1510 #[test]
1511 fn load_rejects_rpm_above_sanity_cap() {
1512 let mut file = tempfile::NamedTempFile::new().unwrap();
1513 writeln!(
1514 file,
1515 r#"
1516[rate_limit]
1517requests_per_minute_per_domain = 100000
1518"#
1519 )
1520 .unwrap();
1521 assert!(matches!(
1522 load(Some(file.path())),
1523 Err(ConfigError::Invalid { .. })
1524 ));
1525 }
1526
1527 #[test]
1528 fn load_rejects_max_retries_above_10() {
1529 let mut file = tempfile::NamedTempFile::new().unwrap();
1530 writeln!(
1531 file,
1532 r#"
1533[rate_limit]
1534max_retries = 11
1535"#
1536 )
1537 .unwrap();
1538 assert!(matches!(
1539 load(Some(file.path())),
1540 Err(ConfigError::Invalid { .. })
1541 ));
1542 }
1543
1544 #[test]
1545 fn load_rejects_backoff_inversion() {
1546 let mut file = tempfile::NamedTempFile::new().unwrap();
1547 writeln!(
1548 file,
1549 r#"
1550[rate_limit]
1551initial_backoff = "10s"
1552max_backoff = "5s"
1553"#
1554 )
1555 .unwrap();
1556 assert!(matches!(
1557 load(Some(file.path())),
1558 Err(ConfigError::Invalid { .. })
1559 ));
1560 }
1561
1562 #[test]
1563 fn load_rejects_failure_ttl_above_default_ttl() {
1564 let mut file = tempfile::NamedTempFile::new().unwrap();
1565 writeln!(
1566 file,
1567 r#"
1568[robots]
1569default_ttl = "1m"
1570failure_ttl = "10m"
1571"#
1572 )
1573 .unwrap();
1574 assert!(matches!(
1575 load(Some(file.path())),
1576 Err(ConfigError::Invalid { .. })
1577 ));
1578 }
1579
1580 #[test]
1581 fn summarization_section_parses_with_defaults() {
1582 let toml = r#"
1583[summarization]
1584"#;
1585 let cfg: Config = toml::from_str(toml).unwrap();
1586 assert_eq!(cfg.summarization.default_backend, "default");
1587 assert_eq!(cfg.summarization.default_mode, "abstractive");
1588 assert_eq!(cfg.summarization.default_style, "prose");
1589 assert!(cfg.summarization.fallback_to_extractive);
1590 assert_eq!(cfg.summarization.tables.target_tokens, 150);
1591 assert!(cfg.summarization.tables.focus.contains("Describe"));
1592 }
1593
1594 #[test]
1595 fn summarization_tables_block_overrides_defaults() {
1596 let toml = r#"
1597[summarization.tables]
1598target_tokens = 250
1599focus = "Custom table focus prompt."
1600"#;
1601 let cfg: Config = toml::from_str(toml).unwrap();
1602 assert_eq!(cfg.summarization.tables.target_tokens, 250);
1603 assert_eq!(cfg.summarization.tables.focus, "Custom table focus prompt.");
1604 assert_eq!(cfg.summarization.default_backend, "default");
1606 }
1607
1608 #[test]
1609 fn backends_section_parses_extractive_block() {
1610 let toml = r#"
1611[backends.default]
1612kind = "extractive"
1613"#;
1614 let cfg: Config = toml::from_str(toml).unwrap();
1615 assert_eq!(cfg.backends.len(), 1);
1616 let b = cfg.backends.get("default").unwrap();
1617 assert_eq!(b.kind, "extractive");
1618 assert!(b.provider.is_none());
1619 }
1620
1621 #[test]
1622 fn backends_section_parses_cloud_block_with_all_fields() {
1623 let toml = r#"
1624[backends.lm_studio]
1625kind = "cloud"
1626provider = "openai_compat"
1627base_url = "http://localhost:1234/v1"
1628model = "qwen3.5-0.8b"
1629api_key_env = "LM_KEY"
1630"#;
1631 let cfg: Config = toml::from_str(toml).unwrap();
1632 let b = cfg.backends.get("lm_studio").unwrap();
1633 assert_eq!(b.kind, "cloud");
1634 assert_eq!(b.provider.as_deref(), Some("openai_compat"));
1635 assert_eq!(b.base_url.as_deref(), Some("http://localhost:1234/v1"));
1636 assert_eq!(b.model.as_deref(), Some("qwen3.5-0.8b"));
1637 assert_eq!(b.api_key_env.as_deref(), Some("LM_KEY"));
1638 }
1639
1640 #[test]
1641 fn missing_summarization_section_yields_defaults() {
1642 let cfg: Config = toml::from_str("").unwrap();
1643 assert_eq!(cfg.summarization.default_backend, "default");
1644 assert!(cfg.backends.is_empty());
1645 }
1646
1647 #[test]
1648 fn ssrf_section_parses_with_defaults() {
1649 let toml = r#"
1650[ssrf]
1651"#;
1652 let cfg: Config = toml::from_str(toml).unwrap();
1653 assert_eq!(cfg.ssrf.level, "strict");
1654 assert_eq!(cfg.ssrf.project_root, std::path::PathBuf::from("."));
1655 }
1656
1657 #[test]
1658 fn ssrf_section_accepts_each_level() {
1659 for level in &["strict", "loopback", "project", "lan", "none"] {
1660 let toml = format!("[ssrf]\nlevel = \"{level}\"\n");
1661 let cfg: Config = toml::from_str(&toml).unwrap();
1662 assert_eq!(cfg.ssrf.level, *level);
1663 }
1664 }
1665
1666 #[test]
1667 fn ssrf_section_rejects_unknown_field() {
1668 let toml = r#"
1669[ssrf]
1670level = "strict"
1671bogus = 1
1672"#;
1673 let r: Result<Config, _> = toml::from_str(toml);
1674 assert!(r.is_err(), "expected deny_unknown_fields rejection");
1675 }
1676
1677 #[test]
1678 fn missing_ssrf_section_yields_defaults() {
1679 let cfg: Config = toml::from_str("").unwrap();
1680 assert_eq!(cfg.ssrf.level, "strict");
1681 }
1682
1683 #[test]
1684 fn debug_section_parses_with_defaults() {
1685 let cfg: Config = toml::from_str("[debug]\n").unwrap();
1686 assert_eq!(cfg.debug.har_path, "");
1687 assert_eq!(cfg.debug.har_body_cap, 64 * 1024);
1688 assert_eq!(cfg.debug.log_level, "info");
1689 }
1690
1691 #[test]
1692 fn debug_section_har_body_cap_accepts_humansize() {
1693 let cfg: Config = toml::from_str(
1694 r#"[debug]
1695har_body_cap = "1MiB"
1696"#,
1697 )
1698 .unwrap();
1699 assert_eq!(cfg.debug.har_body_cap, 1024 * 1024);
1700 }
1701
1702 #[test]
1703 fn debug_section_har_body_cap_accepts_integer_bytes() {
1704 let cfg: Config = toml::from_str(
1705 r#"[debug]
1706har_body_cap = 8192
1707"#,
1708 )
1709 .unwrap();
1710 assert_eq!(cfg.debug.har_body_cap, 8192);
1711 }
1712
1713 #[test]
1714 fn debug_section_rejects_unknown_field() {
1715 let r: Result<Config, _> = toml::from_str(
1716 r#"[debug]
1717har_path = ""
1718bogus = 1
1719"#,
1720 );
1721 assert!(r.is_err());
1722 }
1723
1724 #[test]
1725 fn image_captions_defaults_match_spec() {
1726 let c = ImageCaptionsConfig::default();
1727 assert_eq!(c.max_tokens, 50);
1728 assert_eq!(c.max_per_page, 10);
1729 assert_eq!(c.min_width, 200);
1730 assert_eq!(c.min_height, 200);
1731 assert_eq!(c.max_bytes, 10 * 1024 * 1024);
1732 assert_eq!(c.max_concurrent, 2);
1733 }
1734
1735 #[test]
1736 fn human_bytes_parses_common_forms() {
1737 assert_eq!(parse_human_bytes("1024").unwrap(), 1024);
1738 assert_eq!(parse_human_bytes("10MiB").unwrap(), 10 * 1024 * 1024);
1739 assert_eq!(parse_human_bytes("10MB").unwrap(), 10_000_000);
1740 assert_eq!(
1741 parse_human_bytes("1.5GiB").unwrap(),
1742 (1.5_f64 * 1024.0 * 1024.0 * 1024.0) as u64
1743 );
1744 assert!(parse_human_bytes("bogus").is_err());
1745 }
1746
1747 #[test]
1748 fn image_captions_deserializes_from_toml() {
1749 let toml_str = r#"
1750[image_captions]
1751default = "openai"
1752max_per_page = 5
1753min_width = 100
1754min_height = 100
1755max_bytes = "1MiB"
1756"#;
1757 let cfg: Config = toml::from_str(toml_str).unwrap();
1758 assert_eq!(cfg.image_captions.default.as_deref(), Some("openai"));
1759 assert_eq!(cfg.image_captions.max_per_page, 5);
1760 assert_eq!(cfg.image_captions.max_bytes, 1024 * 1024);
1761 assert_eq!(cfg.image_captions.max_tokens, 50);
1762 }
1763
1764 #[test]
1765 fn captioners_block_round_trips() {
1766 let toml_str = r#"
1767[captioners.openai]
1768kind = "cloud"
1769provider = "openai"
1770model = "gpt-4o-mini"
1771api_key_env = "OPENAI_API_KEY"
1772
1773[captioners.local]
1774kind = "local"
1775model = "HuggingFaceTB/SmolVLM-256M-Instruct"
1776"#;
1777 let cfg: Config = toml::from_str(toml_str).unwrap();
1778 assert_eq!(cfg.captioners.len(), 2);
1779 assert_eq!(
1780 cfg.captioners.get("openai").unwrap().provider.as_deref(),
1781 Some("openai")
1782 );
1783 assert_eq!(cfg.captioners.get("local").unwrap().kind, "local");
1784 }
1785
1786 #[test]
1787 fn headless_m9_keys_default_correctly() {
1788 let h = HeadlessConfig::default();
1789 assert_eq!(h.max_concurrent, 4);
1790 assert!(h.chrome_executable.is_empty());
1791 assert_eq!(h.launch_delay_secs, 2);
1792 assert_eq!(h.launch_delay(), std::time::Duration::from_secs(2));
1793 }
1794
1795 #[test]
1796 fn headless_launch_delay_parses_and_disables() {
1797 let cfg: Config = toml::from_str("[headless]\nlaunch_delay_secs = 0\n").unwrap();
1798 assert_eq!(cfg.headless.launch_delay_secs, 0);
1799 assert!(cfg.headless.launch_delay().is_zero());
1800 let cfg: Config = toml::from_str("[headless]\nlaunch_delay_secs = 5\n").unwrap();
1801 assert_eq!(
1802 cfg.headless.launch_delay(),
1803 std::time::Duration::from_secs(5)
1804 );
1805 }
1806
1807 #[test]
1808 fn prompt_injection_defaults_when_absent() {
1809 let cfg: Config = toml::from_str("").unwrap();
1810 assert_eq!(cfg.prompt_injection.level, "moderate");
1811 assert_eq!(cfg.prompt_injection.model, "disabled");
1812 assert!((cfg.prompt_injection.model_threshold - 0.9).abs() < f64::EPSILON);
1813 assert!(cfg.prompt_injection.allowlist.wrap.is_empty());
1814 assert!(cfg.prompt_injection.allowlist.patterns.is_empty());
1815 assert!(cfg.prompt_injection.allowlist.model.is_empty());
1816 assert!(!cfg.prompt_injection.agent_overrides.wrap);
1817 assert!(!cfg.prompt_injection.agent_overrides.patterns);
1818 assert!(!cfg.prompt_injection.agent_overrides.model);
1819 assert!(!cfg.prompt_injection.agent_overrides.level);
1820 }
1821
1822 #[test]
1823 fn prompt_injection_parses_full_block() {
1824 let toml = r#"
1825[prompt_injection]
1826level = "strict"
1827model = "deberta-base"
1828model_threshold = 0.75
1829
1830[prompt_injection.allowlist]
1831wrap = ["https://*.internal.example.com/*"]
1832patterns = ["*"]
1833model = []
1834
1835[prompt_injection.agent_overrides]
1836wrap = true
1837patterns = false
1838model = true
1839level = true
1840"#;
1841 let cfg: Config = toml::from_str(toml).unwrap();
1842 assert_eq!(cfg.prompt_injection.level, "strict");
1843 assert_eq!(cfg.prompt_injection.model, "deberta-base");
1844 assert!((cfg.prompt_injection.model_threshold - 0.75).abs() < f64::EPSILON);
1845 assert_eq!(
1846 cfg.prompt_injection.allowlist.wrap,
1847 vec!["https://*.internal.example.com/*".to_string()]
1848 );
1849 assert_eq!(
1850 cfg.prompt_injection.allowlist.patterns,
1851 vec!["*".to_string()]
1852 );
1853 assert!(cfg.prompt_injection.agent_overrides.wrap);
1854 assert!(!cfg.prompt_injection.agent_overrides.patterns);
1855 assert!(cfg.prompt_injection.agent_overrides.model);
1856 assert!(cfg.prompt_injection.agent_overrides.level);
1857 }
1858
1859 #[test]
1860 fn prompt_injection_rejects_unknown_field() {
1861 let toml = "[prompt_injection]\nbogus = 1\n";
1862 let r: Result<Config, _> = toml::from_str(toml);
1863 assert!(r.is_err(), "expected deny_unknown_fields rejection");
1864 }
1865
1866 #[test]
1867 fn config_candidates_prefers_rover_config_env_as_sole_candidate() {
1868 let c = config_candidates_from(Some("/custom/x.toml"), Some(Path::new("/cfg")));
1869 assert_eq!(c, vec![std::path::PathBuf::from("/custom/x.toml")]);
1870 }
1871
1872 #[test]
1873 fn config_candidates_searches_platform_then_cwd() {
1874 let c = config_candidates_from(None, Some(Path::new("/cfg")));
1875 assert_eq!(
1876 c,
1877 vec![
1878 std::path::PathBuf::from("/cfg/rover/rover.toml"),
1879 std::path::PathBuf::from("rover.toml"),
1880 ]
1881 );
1882 }
1883
1884 #[test]
1885 fn config_candidates_falls_back_to_cwd_rover_toml() {
1886 let c = config_candidates_from(None, None);
1887 assert_eq!(c, vec![std::path::PathBuf::from("rover.toml")]);
1888 }
1889
1890 #[test]
1891 fn resolve_existing_prefers_platform_over_cwd_candidate() {
1892 let tmp = tempfile::tempdir().unwrap();
1894 let rover_dir = tmp.path().join("rover");
1895 std::fs::create_dir_all(&rover_dir).unwrap();
1896 let platform_file = rover_dir.join("rover.toml");
1897 std::fs::write(&platform_file, "[fetch]\ntimeout_secs = 3\n").unwrap();
1898
1899 let resolved = config_candidates_from(None, Some(tmp.path()))
1900 .into_iter()
1901 .find(|p| p.is_file());
1902 assert_eq!(resolved, Some(platform_file));
1903 }
1904
1905 #[test]
1906 fn resolve_existing_is_none_when_no_candidate_exists() {
1907 let tmp = tempfile::tempdir().unwrap();
1908 let resolved = config_candidates_from(None, Some(tmp.path()))
1910 .into_iter()
1911 .find(|p| p.is_file());
1912 assert_eq!(resolved, None);
1913 }
1914
1915 #[test]
1916 fn load_resolved_uses_explicit_path_when_present() {
1917 let mut file = tempfile::NamedTempFile::new().unwrap();
1918 writeln!(file, "[fetch]\ntimeout_secs = 7\n").unwrap();
1919 let cfg = load_resolved_from(Some(file.path()), None).unwrap();
1921 assert_eq!(cfg.fetch.timeout_secs, 7);
1922 }
1923
1924 #[test]
1925 fn load_resolved_errors_when_explicit_path_missing() {
1926 let mut default_file = tempfile::NamedTempFile::new().unwrap();
1929 writeln!(default_file, "[fetch]\ntimeout_secs = 9\n").unwrap();
1930 let result = load_resolved_from(
1931 Some(Path::new("/no/such/__rover_explicit__.toml")),
1932 Some(default_file.path()),
1933 );
1934 assert!(matches!(result, Err(ConfigError::Read { .. })));
1935 }
1936
1937 #[test]
1938 fn load_resolved_loads_resolved_default_when_no_explicit() {
1939 let mut file = tempfile::NamedTempFile::new().unwrap();
1940 writeln!(file, "[fetch]\ntimeout_secs = 11\n").unwrap();
1941 let cfg = load_resolved_from(None, Some(file.path())).unwrap();
1942 assert_eq!(cfg.fetch.timeout_secs, 11);
1943 }
1944
1945 #[test]
1946 fn load_resolved_falls_back_to_defaults_when_nothing_resolves() {
1947 let cfg = load_resolved_from(None, None).unwrap();
1948 assert_eq!(cfg.fetch.timeout_secs, default_timeout_secs());
1949 }
1950}