Skip to main content

rover/config/
mod.rs

1//! Configuration loading.
2//!
3//! M1 covers a tiny subset of the full schema documented in PRD §12.
4//! Subsequent milestones extend this struct.
5
6pub mod edit;
7pub mod provenance;
8
9use serde::{Deserialize, Serialize};
10use std::path::{Path, PathBuf};
11use std::time::Duration;
12use thiserror::Error;
13
14#[derive(Debug, Error)]
15pub enum ConfigError {
16    #[error("failed to read config at {path}: {source}")]
17    Read {
18        path: String,
19        source: std::io::Error,
20    },
21
22    #[error("failed to parse config at {path}: {source}")]
23    Parse {
24        path: String,
25        source: toml::de::Error,
26    },
27
28    #[error("invalid config at {path}: {message}")]
29    Invalid { path: String, message: String },
30}
31
32#[derive(Debug, Clone, Default, Deserialize, Serialize)]
33#[serde(deny_unknown_fields)]
34pub struct Config {
35    #[serde(default)]
36    pub fetch: FetchConfig,
37
38    #[serde(default)]
39    pub ssrf: SsrfConfig,
40
41    #[serde(default)]
42    pub debug: DebugConfig,
43
44    #[serde(default)]
45    pub cache: CacheConfig,
46
47    #[serde(default)]
48    pub tokenizer: TokenizerConfig,
49
50    #[serde(default)]
51    pub mcp: McpConfig,
52
53    #[serde(default)]
54    pub output: OutputConfig,
55
56    #[serde(default)]
57    pub rate_limit: RateLimitConfig,
58
59    #[serde(default)]
60    pub robots: RobotsConfig,
61
62    #[serde(default)]
63    pub summarization: SummarizationConfig,
64
65    #[serde(default)]
66    pub backends: std::collections::HashMap<String, BackendConfig>,
67
68    #[serde(default)]
69    pub headless: HeadlessConfig,
70
71    #[serde(default)]
72    pub image_captions: ImageCaptionsConfig,
73
74    #[serde(default)]
75    pub captioners: std::collections::BTreeMap<String, CaptionerConfig>,
76
77    #[serde(default)]
78    pub prompt_injection: PromptInjectionConfig,
79}
80
81#[derive(Debug, Clone, Deserialize, Serialize)]
82#[serde(deny_unknown_fields)]
83pub struct FetchConfig {
84    #[serde(default = "default_user_agent")]
85    pub user_agent: String,
86
87    /// Request timeout in seconds. Stored as u64 for TOML friendliness.
88    #[serde(default = "default_timeout_secs")]
89    pub timeout_secs: u64,
90}
91
92impl Default for FetchConfig {
93    fn default() -> Self {
94        Self {
95            user_agent: default_user_agent(),
96            timeout_secs: default_timeout_secs(),
97        }
98    }
99}
100
101impl FetchConfig {
102    pub fn timeout(&self) -> Duration {
103        Duration::from_secs(self.timeout_secs)
104    }
105}
106
107impl Config {
108    /// Apply CLI / MCP override flags onto an already-loaded config.
109    ///
110    /// Centralises the override logic shared by `rover fetch`, `rover mcp`, and
111    /// (M6) `rover batch`. Bypasses `config::validate`; concurrency widths are
112    /// clamped to >=1 to avoid `Semaphore::new(0)` silently hanging on acquire
113    /// (regression fix from M5 commit 02bd7e8).
114    pub fn apply_overrides(
115        &mut self,
116        rate_limit_rpm: Option<u32>,
117        per_host_concurrency: Option<u32>,
118        global_concurrency: Option<u32>,
119        max_retries: Option<u8>,
120        ignore_robots: bool,
121    ) {
122        if let Some(v) = rate_limit_rpm {
123            self.rate_limit.requests_per_minute_per_domain = v;
124        }
125        if let Some(v) = per_host_concurrency {
126            self.rate_limit.per_domain_concurrency = v.max(1);
127        }
128        if let Some(v) = global_concurrency {
129            self.rate_limit.global_concurrency = v.max(1);
130        }
131        if let Some(v) = max_retries {
132            self.rate_limit.max_retries = v;
133        }
134        if ignore_robots {
135            self.robots.respect = false;
136        }
137    }
138
139    /// Test-only convenience for swapping the SSRF level on an
140    /// already-loaded config. Production callers go through TOML.
141    #[cfg(any(test, feature = "test-loopback"))]
142    pub fn with_ssrf_level(mut self, level: &str) -> Self {
143        self.ssrf.level = level.to_string();
144        self
145    }
146}
147
148fn default_user_agent() -> String {
149    format!(
150        "Rover/{} (+https://github.com/aaronbassett/rover)",
151        env!("CARGO_PKG_VERSION")
152    )
153}
154
155fn default_timeout_secs() -> u64 {
156    15
157}
158
159/// Cache configuration. All durations are parsed by `humantime` (e.g. "1h",
160/// "5m", "7d", "30s"). Defaults follow PRD §12.
161#[derive(Debug, Clone, Deserialize, Serialize)]
162#[serde(deny_unknown_fields)]
163pub struct CacheConfig {
164    #[serde(default = "default_cache_default_ttl", with = "humantime_serde")]
165    pub default_ttl: Duration,
166
167    #[serde(default = "default_cache_min_ttl", with = "humantime_serde")]
168    pub min_ttl: Duration,
169
170    #[serde(default = "default_cache_max_ttl", with = "humantime_serde")]
171    pub max_ttl: Duration,
172
173    /// Stale-while-revalidate grace window. When a cache entry expired no
174    /// more than this long ago, `fetch_with_cache` may serve the stale row
175    /// and queue a background `revalidate` task. Beyond this window the
176    /// row is treated as a cache miss and re-fetched synchronously, so
177    /// callers never receive arbitrarily old content from the cache.
178    /// Default: 5 minutes.
179    #[serde(default = "default_cache_swr_window", with = "humantime_serde")]
180    pub stale_while_revalidate_window: Duration,
181
182    #[serde(default)]
183    pub override_no_store: bool,
184
185    #[serde(default)]
186    pub override_no_store_domains: Vec<String>,
187
188    /// When true, store the gzipped raw HTML alongside the extracted Markdown.
189    /// Disabled by default to keep the database small.
190    #[serde(default)]
191    pub store_raw_html: bool,
192}
193
194impl Default for CacheConfig {
195    fn default() -> Self {
196        Self {
197            default_ttl: default_cache_default_ttl(),
198            min_ttl: default_cache_min_ttl(),
199            max_ttl: default_cache_max_ttl(),
200            stale_while_revalidate_window: default_cache_swr_window(),
201            override_no_store: false,
202            override_no_store_domains: vec![],
203            store_raw_html: false,
204        }
205    }
206}
207
208fn default_cache_default_ttl() -> Duration {
209    // 15 minutes. Tightened from 1h so that, absent an explicit `Cache-Control`
210    // max-age, a cache poisoned with stale or attacker-influenced content has a
211    // short blast radius before the next revalidation. Origins that want longer
212    // caching can still say so via response headers.
213    Duration::from_secs(15 * 60)
214}
215
216fn default_cache_min_ttl() -> Duration {
217    Duration::from_secs(300)
218}
219
220fn default_cache_max_ttl() -> Duration {
221    Duration::from_secs(7 * 86400)
222}
223
224fn default_cache_swr_window() -> Duration {
225    Duration::from_secs(5 * 60)
226}
227
228/// Tokenizer configuration. The `default` family is used for token counting
229/// in the frontmatter and the MCP layer when callers don't specify one.
230#[derive(Debug, Clone, Deserialize, Serialize)]
231#[serde(deny_unknown_fields)]
232pub struct TokenizerConfig {
233    #[serde(default = "default_tokenizer")]
234    pub default: crate::tokenizer::Tokenizer,
235}
236
237impl Default for TokenizerConfig {
238    fn default() -> Self {
239        Self {
240            default: default_tokenizer(),
241        }
242    }
243}
244
245fn default_tokenizer() -> crate::tokenizer::Tokenizer {
246    crate::tokenizer::Tokenizer::O200k
247}
248
249/// MCP server configuration. Durations are parsed by `humantime`
250/// (e.g. "5s", "60s", "2m"). Both intervals must be non-zero.
251#[derive(Debug, Clone, Deserialize, Serialize)]
252#[serde(deny_unknown_fields)]
253pub struct McpConfig {
254    #[serde(default = "default_heartbeat_interval", with = "humantime_serde")]
255    pub heartbeat_interval: Duration,
256
257    #[serde(default = "default_reap_threshold", with = "humantime_serde")]
258    pub reap_threshold: Duration,
259}
260
261impl Default for McpConfig {
262    fn default() -> Self {
263        Self {
264            heartbeat_interval: default_heartbeat_interval(),
265            reap_threshold: default_reap_threshold(),
266        }
267    }
268}
269
270fn default_heartbeat_interval() -> Duration {
271    Duration::from_secs(5)
272}
273
274fn default_reap_threshold() -> Duration {
275    Duration::from_secs(60)
276}
277
278/// Output configuration. When `dir` is `None`, `ROVER_OUTPUT_DIR` (if set)
279/// takes precedence, otherwise the platform `data_local_dir()/rover/output`
280/// default applies. See `OutputPaths::resolve`.
281#[derive(Debug, Clone, Default, Deserialize, Serialize)]
282#[serde(deny_unknown_fields)]
283pub struct OutputConfig {
284    #[serde(default)]
285    pub dir: Option<std::path::PathBuf>,
286}
287
288/// Per-domain pacing knobs. All HTTP-bound code paths run through a single
289/// `Pacer` built from this struct at startup. See M5 design spec §3 and §4.
290#[derive(Debug, Clone, Deserialize, Serialize)]
291#[serde(deny_unknown_fields)]
292pub struct RateLimitConfig {
293    #[serde(default = "default_rpm_per_domain")]
294    pub requests_per_minute_per_domain: u32,
295
296    #[serde(default = "default_per_domain_concurrency")]
297    pub per_domain_concurrency: u32,
298
299    #[serde(default = "default_global_concurrency")]
300    pub global_concurrency: u32,
301
302    #[serde(default = "default_max_retries")]
303    pub max_retries: u8,
304
305    #[serde(default = "default_initial_backoff", with = "humantime_serde")]
306    pub initial_backoff: Duration,
307
308    #[serde(default = "default_max_backoff", with = "humantime_serde")]
309    pub max_backoff: Duration,
310
311    #[serde(default = "default_retry_after_ceiling", with = "humantime_serde")]
312    pub retry_after_ceiling: Duration,
313
314    /// Deterministic seed for the backoff jitter RNG. `None` (default) means
315    /// entropy; set in tests to make timing assertions reproducible.
316    #[serde(default)]
317    pub jitter_seed: Option<u64>,
318
319    /// Threshold (seconds) above which a server-provided `Retry-After`
320    /// converts a synchronous fetch into a deferred `retry` task instead of
321    /// sleeping in-line. See M6 design §3.
322    #[serde(default = "default_deferred_threshold_secs")]
323    pub deferred_retry_threshold_secs: u64,
324}
325
326impl Default for RateLimitConfig {
327    fn default() -> Self {
328        Self {
329            requests_per_minute_per_domain: default_rpm_per_domain(),
330            per_domain_concurrency: default_per_domain_concurrency(),
331            global_concurrency: default_global_concurrency(),
332            max_retries: default_max_retries(),
333            initial_backoff: default_initial_backoff(),
334            max_backoff: default_max_backoff(),
335            retry_after_ceiling: default_retry_after_ceiling(),
336            jitter_seed: None,
337            deferred_retry_threshold_secs: default_deferred_threshold_secs(),
338        }
339    }
340}
341
342fn default_rpm_per_domain() -> u32 {
343    60
344}
345fn default_per_domain_concurrency() -> u32 {
346    2
347}
348fn default_global_concurrency() -> u32 {
349    8
350}
351fn default_max_retries() -> u8 {
352    3
353}
354fn default_initial_backoff() -> Duration {
355    Duration::from_millis(500)
356}
357fn default_max_backoff() -> Duration {
358    Duration::from_secs(30)
359}
360fn default_retry_after_ceiling() -> Duration {
361    Duration::from_secs(300)
362}
363fn default_deferred_threshold_secs() -> u64 {
364    30
365}
366
367/// Robots.txt fetch + respect knobs.
368#[derive(Debug, Clone, Deserialize, Serialize)]
369#[serde(deny_unknown_fields)]
370pub struct RobotsConfig {
371    #[serde(default = "default_respect")]
372    pub respect: bool,
373
374    /// Hosts for which robots.txt is not fetched and rules are not enforced.
375    /// Lowercased in-place by `validate`.
376    #[serde(default)]
377    pub ignore_domains: Vec<String>,
378
379    /// Used when the robots.txt HTTP response has no `Cache-Control: max-age`.
380    #[serde(default = "default_robots_ttl", with = "humantime_serde")]
381    pub default_ttl: Duration,
382
383    /// Used when robots.txt fetch failed with 5xx or transport error (fail-closed).
384    /// Short by design so a recovered server is picked up quickly.
385    #[serde(default = "default_robots_failure_ttl", with = "humantime_serde")]
386    pub failure_ttl: Duration,
387}
388
389impl Default for RobotsConfig {
390    fn default() -> Self {
391        Self {
392            respect: default_respect(),
393            ignore_domains: Vec::new(),
394            default_ttl: default_robots_ttl(),
395            failure_ttl: default_robots_failure_ttl(),
396        }
397    }
398}
399
400fn default_respect() -> bool {
401    // Rover is an agent's browser, not a spider or scraper: it fetches the
402    // page a user/agent explicitly asked for, one at a time. robots.txt governs
403    // automated crawling, so the gate defaults off. Set `robots.respect = true`
404    // (or pass nothing and rely on rate limits) to opt back into enforcement.
405    false
406}
407fn default_robots_ttl() -> Duration {
408    Duration::from_secs(24 * 3600)
409}
410fn default_robots_failure_ttl() -> Duration {
411    Duration::from_secs(5 * 60)
412}
413
414/// Top-level `[summarization]` section.
415#[derive(Debug, Clone, Deserialize, Serialize)]
416#[serde(deny_unknown_fields)]
417pub struct SummarizationConfig {
418    #[serde(default = "default_summarization_backend")]
419    pub default_backend: String,
420
421    #[serde(default = "default_summarization_mode")]
422    pub default_mode: String,
423
424    #[serde(default = "default_summarization_style")]
425    pub default_style: String,
426
427    #[serde(default = "default_summarization_fallback")]
428    pub fallback_to_extractive: bool,
429
430    /// Per-table summarization defaults consumed by the
431    /// `TablesMode::Summarize` hook in `mcp::tools::fetch`. Lives under
432    /// `[summarization.tables]` in the config file.
433    #[serde(default)]
434    pub tables: TablesSummarizationConfig,
435}
436
437impl Default for SummarizationConfig {
438    fn default() -> Self {
439        Self {
440            default_backend: default_summarization_backend(),
441            default_mode: default_summarization_mode(),
442            default_style: default_summarization_style(),
443            fallback_to_extractive: default_summarization_fallback(),
444            tables: TablesSummarizationConfig::default(),
445        }
446    }
447}
448
449fn default_summarization_backend() -> String {
450    "default".to_string()
451}
452fn default_summarization_mode() -> String {
453    "abstractive".to_string()
454}
455fn default_summarization_style() -> String {
456    "prose".to_string()
457}
458fn default_summarization_fallback() -> bool {
459    true
460}
461
462/// `[summarization.tables]` block. Controls the per-table summarize
463/// defaults used by the `TablesMode::Summarize` hook.
464#[derive(Debug, Clone, Deserialize, Serialize)]
465#[serde(deny_unknown_fields)]
466pub struct TablesSummarizationConfig {
467    #[serde(default = "default_tables_target_tokens")]
468    pub target_tokens: usize,
469    #[serde(default = "default_tables_focus")]
470    pub focus: String,
471}
472
473impl Default for TablesSummarizationConfig {
474    fn default() -> Self {
475        Self {
476            target_tokens: default_tables_target_tokens(),
477            focus: default_tables_focus(),
478        }
479    }
480}
481
482fn default_tables_target_tokens() -> usize {
483    150
484}
485fn default_tables_focus() -> String {
486    "Describe what this table shows. Highlight any extreme values or notable rows.".to_string()
487}
488
489/// One `[backends.<name>]` block. Free-form `kind`/`provider` strings —
490/// validation lives in `summarizer::registry::build` where the parsed
491/// values are matched against the typed enum.
492#[derive(Debug, Clone, Deserialize, Serialize, Default)]
493#[serde(deny_unknown_fields)]
494pub struct BackendConfig {
495    pub kind: String,
496    #[serde(default)]
497    pub provider: Option<String>,
498    #[serde(default)]
499    pub model: Option<String>,
500    #[serde(default)]
501    pub base_url: Option<String>,
502    #[serde(default)]
503    pub api_key_env: Option<String>,
504}
505
506/// `[headless]` configuration block. M9 adds browser/headless-fetch knobs.
507#[derive(Debug, Clone, Deserialize, Serialize)]
508#[serde(deny_unknown_fields)]
509pub struct HeadlessConfig {
510    #[serde(default = "default_headless_max_concurrent")]
511    pub max_concurrent: usize,
512
513    /// Path to a Chrome/Chromium executable. Empty string means auto-detect.
514    #[serde(default)]
515    pub chrome_executable: String,
516
517    /// Fulfill image requests with empty 200 (saves bandwidth + render time).
518    #[serde(default = "default_block_images")]
519    pub block_images: bool,
520
521    /// Fulfill font requests with empty 200.
522    #[serde(default = "default_block_fonts")]
523    pub block_fonts: bool,
524
525    /// Fulfill audio/video/track requests with empty 200.
526    #[serde(default = "default_block_media")]
527    pub block_media: bool,
528
529    /// Fulfill CSS requests with empty 200. Default `false` — many SPAs need
530    /// layout to render correctly.
531    #[serde(default)]
532    pub block_css: bool,
533
534    /// Fulfill third-party analytics/tracker requests with empty 200.
535    #[serde(default = "default_block_third_party")]
536    pub block_third_party: bool,
537
538    /// Disable service workers at browser init via CDP bypass. Honored by
539    /// `HeadlessRenderer` setup (not by the intercept handler).
540    #[serde(default = "default_block_service_workers")]
541    pub block_service_workers: bool,
542
543    /// Default wait condition: `"domcontentloaded"` or `"networkidle0"`
544    /// (wait for the network to fully settle — captures post-load XHR content).
545    #[serde(default = "default_headless_wait")]
546    pub default_wait: String,
547
548    /// Per-render timeout in seconds (covers the wait phase).
549    #[serde(default = "default_headless_timeout_secs")]
550    pub timeout_secs: u64,
551
552    /// Whether `HeadlessMode::Auto` should run the SPA detection heuristic.
553    #[serde(default = "default_auto_detect_spa")]
554    pub auto_detect_spa: bool,
555}
556
557impl HeadlessConfig {
558    /// Render timeout as a `Duration`.
559    pub fn timeout(&self) -> std::time::Duration {
560        std::time::Duration::from_secs(self.timeout_secs)
561    }
562}
563
564impl Default for HeadlessConfig {
565    fn default() -> Self {
566        Self {
567            max_concurrent: default_headless_max_concurrent(),
568            chrome_executable: String::new(),
569            block_images: default_block_images(),
570            block_fonts: default_block_fonts(),
571            block_media: default_block_media(),
572            block_css: false,
573            block_third_party: default_block_third_party(),
574            block_service_workers: default_block_service_workers(),
575            default_wait: default_headless_wait(),
576            timeout_secs: default_headless_timeout_secs(),
577            auto_detect_spa: default_auto_detect_spa(),
578        }
579    }
580}
581
582fn default_headless_max_concurrent() -> usize {
583    4
584}
585
586fn default_headless_wait() -> String {
587    "domcontentloaded".to_string()
588}
589
590fn default_headless_timeout_secs() -> u64 {
591    15
592}
593
594fn default_auto_detect_spa() -> bool {
595    true
596}
597
598fn default_block_images() -> bool {
599    true
600}
601
602fn default_block_fonts() -> bool {
603    true
604}
605
606fn default_block_media() -> bool {
607    true
608}
609
610fn default_block_third_party() -> bool {
611    true
612}
613
614fn default_block_service_workers() -> bool {
615    true
616}
617
618/// `[image_captions]` defaults block.
619#[derive(Debug, Clone, Deserialize, Serialize)]
620#[serde(default, deny_unknown_fields)]
621pub struct ImageCaptionsConfig {
622    pub default: Option<String>,
623    pub max_tokens: usize,
624    pub max_per_page: usize,
625    pub min_width: u32,
626    pub min_height: u32,
627    #[serde(deserialize_with = "humanbytes_to_u64")]
628    pub max_bytes: u64,
629    pub max_concurrent: usize,
630}
631
632impl Default for ImageCaptionsConfig {
633    fn default() -> Self {
634        Self {
635            default: None,
636            max_tokens: 50,
637            max_per_page: 10,
638            min_width: 200,
639            min_height: 200,
640            max_bytes: 10 * 1024 * 1024,
641            max_concurrent: 2,
642        }
643    }
644}
645
646/// `[captioners.<name>]` block. Mirrors `BackendConfig` (M7).
647#[derive(Debug, Clone, Default, Deserialize, Serialize)]
648#[serde(default, deny_unknown_fields)]
649pub struct CaptionerConfig {
650    pub kind: String,
651    pub provider: Option<String>,
652    pub model: Option<String>,
653    pub base_url: Option<String>,
654    pub api_key_env: Option<String>,
655}
656
657/// Parse a human-readable byte size string such as "10MiB", "1.5GiB", "1000"
658/// into a raw `u64` byte count.
659pub fn parse_human_bytes(s: &str) -> Result<u64, String> {
660    let s = s.trim();
661    if let Ok(n) = s.parse::<u64>() {
662        return Ok(n);
663    }
664    let (num_str, unit) = s
665        .find(|c: char| c.is_ascii_alphabetic())
666        .map(|i| (&s[..i], &s[i..]))
667        .ok_or_else(|| format!("invalid size: {s}"))?;
668    let num: f64 = num_str
669        .trim()
670        .parse()
671        .map_err(|_| format!("invalid size number: {num_str}"))?;
672    let mult: u64 = match unit.trim().to_ascii_uppercase().as_str() {
673        "B" => 1,
674        "K" | "KB" => 1_000,
675        "KIB" => 1_024,
676        "M" | "MB" => 1_000_000,
677        "MIB" => 1_024 * 1_024,
678        "G" | "GB" => 1_000_000_000,
679        "GIB" => 1_024 * 1_024 * 1_024,
680        other => return Err(format!("unknown size unit: {other}")),
681    };
682    Ok((num * mult as f64) as u64)
683}
684
685fn humanbytes_to_u64<'de, D>(d: D) -> Result<u64, D::Error>
686where
687    D: serde::Deserializer<'de>,
688{
689    use serde::de::Error as _;
690    let v = toml::Value::deserialize(d)?;
691    match v {
692        toml::Value::Integer(n) if n >= 0 => Ok(n as u64),
693        toml::Value::String(s) => parse_human_bytes(&s).map_err(D::Error::custom),
694        other => Err(D::Error::custom(format!(
695            "expected integer bytes or humansize string, got {other:?}",
696        ))),
697    }
698}
699
700/// Top-level `[ssrf]` section. M8 introduces this — earlier milestones
701/// hardcoded `SsrfLevel::Strict`. The `level` field is a free-form string
702/// here so the file accepts unknown levels with a typed error from the
703/// fetcher rather than a serde error; `validate_url`/`validate_addresses`
704/// reject malformed levels at first use.
705#[derive(Debug, Clone, Deserialize, Serialize)]
706#[serde(deny_unknown_fields)]
707pub struct SsrfConfig {
708    #[serde(default = "default_ssrf_level")]
709    pub level: String,
710
711    #[serde(default = "default_ssrf_project_root")]
712    pub project_root: std::path::PathBuf,
713}
714
715impl Default for SsrfConfig {
716    fn default() -> Self {
717        Self {
718            level: default_ssrf_level(),
719            project_root: default_ssrf_project_root(),
720        }
721    }
722}
723
724fn default_ssrf_level() -> String {
725    "strict".to_string()
726}
727
728fn default_ssrf_project_root() -> std::path::PathBuf {
729    std::path::PathBuf::from(".")
730}
731
732/// Top-level `[prompt_injection]` section. `level` and `model` are free-form
733/// strings here (mirroring `SsrfConfig.level`); `guard::GuardConfig::from_config`
734/// parses them into typed enums at first use, surfacing a typed error rather
735/// than a serde error.
736#[derive(Debug, Clone, Deserialize, Serialize)]
737#[serde(deny_unknown_fields)]
738pub struct PromptInjectionConfig {
739    #[serde(default = "default_pi_level")]
740    pub level: String,
741
742    #[serde(default = "default_pi_model")]
743    pub model: String,
744
745    #[serde(default = "default_pi_model_threshold")]
746    pub model_threshold: f64,
747
748    #[serde(default)]
749    pub allowlist: PromptInjectionAllowlist,
750
751    #[serde(default)]
752    pub agent_overrides: PromptInjectionOverrides,
753}
754
755impl Default for PromptInjectionConfig {
756    fn default() -> Self {
757        Self {
758            level: default_pi_level(),
759            model: default_pi_model(),
760            model_threshold: default_pi_model_threshold(),
761            allowlist: PromptInjectionAllowlist::default(),
762            agent_overrides: PromptInjectionOverrides::default(),
763        }
764    }
765}
766
767/// Per-method URL-glob allowlists. A URL matching the glob list skips that
768/// method on OUTPUT for that URL. A bare `"*"` disables the method entirely.
769#[derive(Debug, Clone, Default, Deserialize, Serialize)]
770#[serde(deny_unknown_fields)]
771pub struct PromptInjectionAllowlist {
772    #[serde(default)]
773    pub wrap: Vec<String>,
774    #[serde(default)]
775    pub patterns: Vec<String>,
776    #[serde(default)]
777    pub model: Vec<String>,
778}
779
780/// Per-method agent-override grants (default: all deny). The MCP `security`
781/// arg is honored for a method only when its grant here is `true`.
782#[derive(Debug, Clone, Default, Deserialize, Serialize)]
783#[serde(deny_unknown_fields)]
784pub struct PromptInjectionOverrides {
785    #[serde(default)]
786    pub wrap: bool,
787    #[serde(default)]
788    pub patterns: bool,
789    #[serde(default)]
790    pub model: bool,
791    #[serde(default)]
792    pub level: bool,
793}
794
795fn default_pi_level() -> String {
796    "moderate".to_string()
797}
798fn default_pi_model() -> String {
799    "disabled".to_string()
800}
801fn default_pi_model_threshold() -> f64 {
802    0.9
803}
804
805/// Top-level `[debug]` section. M8 introduces this for HAR recording and
806/// log-level overrides.
807///
808/// `har_body_cap` accepts either a raw integer (bytes) or a humansize
809/// string like "64KiB" / "1MiB" via a custom deserializer. The internal
810/// representation is `u64` bytes.
811#[derive(Debug, Clone, Deserialize, Serialize)]
812#[serde(deny_unknown_fields)]
813pub struct DebugConfig {
814    #[serde(default = "default_debug_har_path")]
815    pub har_path: String,
816
817    #[serde(
818        default = "default_debug_har_body_cap",
819        deserialize_with = "deserialize_humansize"
820    )]
821    pub har_body_cap: u64,
822
823    #[serde(default = "default_debug_log_level")]
824    pub log_level: String,
825}
826
827impl Default for DebugConfig {
828    fn default() -> Self {
829        Self {
830            har_path: default_debug_har_path(),
831            har_body_cap: default_debug_har_body_cap(),
832            log_level: default_debug_log_level(),
833        }
834    }
835}
836
837fn default_debug_har_path() -> String {
838    String::new()
839}
840
841fn default_debug_har_body_cap() -> u64 {
842    64 * 1024
843}
844
845fn default_debug_log_level() -> String {
846    "info".to_string()
847}
848
849fn deserialize_humansize<'de, D>(deserializer: D) -> Result<u64, D::Error>
850where
851    D: serde::Deserializer<'de>,
852{
853    use serde::de::Error as _;
854    let v = toml::Value::deserialize(deserializer)?;
855    match v {
856        toml::Value::Integer(n) if n >= 0 => Ok(n as u64),
857        toml::Value::String(s) => parse_humansize(&s).map_err(D::Error::custom),
858        other => Err(D::Error::custom(format!(
859            "expected integer bytes or humansize string, got {other:?}",
860        ))),
861    }
862}
863
864fn parse_humansize(s: &str) -> Result<u64, String> {
865    let s = s.trim();
866    let (num_part, suffix) = s
867        .find(|c: char| c.is_alphabetic())
868        .map(|i| (&s[..i], &s[i..]))
869        .unwrap_or((s, ""));
870    let n: u64 = num_part
871        .trim()
872        .parse()
873        .map_err(|_| format!("invalid number in `{s}`"))?;
874    let mult: u64 = match suffix.trim() {
875        "" | "B" => 1,
876        "KiB" => 1024,
877        "MiB" => 1024 * 1024,
878        "GiB" => 1024 * 1024 * 1024,
879        other => {
880            return Err(format!(
881                "unknown size suffix `{other}` (expected KiB|MiB|GiB)"
882            ));
883        }
884    };
885    Ok(n * mult)
886}
887
888/// Load config. If `path` is provided, the file must exist and parse cleanly.
889/// If `path` is None, return defaults.
890pub fn load(path: Option<&Path>) -> Result<Config, ConfigError> {
891    let Some(path) = path else {
892        return Ok(Config::default());
893    };
894
895    let bytes = std::fs::read_to_string(path).map_err(|source| ConfigError::Read {
896        path: path.display().to_string(),
897        source,
898    })?;
899    let mut cfg: Config = toml::from_str(&bytes).map_err(|source| ConfigError::Parse {
900        path: path.display().to_string(),
901        source,
902    })?;
903    validate(&mut cfg).map_err(|message| ConfigError::Invalid {
904        path: path.display().to_string(),
905        message,
906    })?;
907    Ok(cfg)
908}
909
910/// Ordered config-file candidates searched when `--config` is absent.
911///
912/// When `ROVER_CONFIG` is set it designates the sole candidate (an explicit
913/// redirect should not silently fall through to other locations). Otherwise the
914/// platform config dir (`<config_dir>/rover/rover.toml`) is tried first, then a
915/// project-local `./rover.toml`.
916fn config_candidates_from(
917    rover_config_env: Option<&str>,
918    config_dir: Option<&Path>,
919) -> Vec<PathBuf> {
920    if let Some(p) = rover_config_env {
921        return vec![PathBuf::from(p)];
922    }
923    let mut candidates = Vec::with_capacity(2);
924    if let Some(dir) = config_dir {
925        candidates.push(dir.join("rover").join("rover.toml"));
926    }
927    candidates.push(PathBuf::from("rover.toml"));
928    candidates
929}
930
931fn config_candidates() -> Vec<PathBuf> {
932    config_candidates_from(
933        std::env::var("ROVER_CONFIG").ok().as_deref(),
934        dirs::config_dir().as_deref(),
935    )
936}
937
938/// The canonical config path: where `rover config set` creates a new file, and
939/// where `rover config show` reports when no file exists yet. This is the first
940/// (highest-precedence) candidate, regardless of whether it exists on disk.
941pub fn default_config_path() -> PathBuf {
942    config_candidates()
943        .into_iter()
944        .next()
945        .expect("config_candidates always yields at least one path")
946}
947
948/// The first existing config file among the ordered candidates, or `None` when
949/// none exists (built-in defaults apply).
950///
951/// Shared by the runtime subcommands and by `config show` / `config set` so all
952/// of them agree on which file is "the active config" — closing the footgun
953/// where `config set` wrote a file the runtime never read.
954pub fn resolve_existing_config_path() -> Option<PathBuf> {
955    config_candidates().into_iter().find(|p| p.is_file())
956}
957
958/// Load the effective config, resolving the default path when `--config` is
959/// absent.
960///
961/// - `Some(path)`: an explicitly requested file. It MUST exist and parse — a
962///   typo in `--config` fails loudly rather than silently falling back to
963///   defaults.
964/// - `None`: search the default candidates (`ROVER_CONFIG`, then the platform
965///   config dir, then `./rover.toml`) and load the first that exists; if none
966///   exists, fall back to built-in defaults (the config file is optional).
967///
968/// Runtime subcommands call this instead of [`load`] so a saved config file is
969/// honored without requiring `--config` on every invocation.
970pub fn load_resolved(explicit: Option<&Path>) -> Result<Config, ConfigError> {
971    if let Some(path) = explicit {
972        tracing::debug!(path = %path.display(), "loading config from --config");
973        return load(Some(path));
974    }
975    match resolve_existing_config_path() {
976        Some(path) => {
977            tracing::debug!(path = %path.display(), "loading config from resolved default path");
978            load(Some(&path))
979        }
980        None => {
981            tracing::debug!("no config file found at any default path; using built-in defaults");
982            Ok(Config::default())
983        }
984    }
985}
986
987/// Pure core shared with the public [`load_resolved`], with the resolved
988/// "active config" path injected so both branches are unit-testable without
989/// touching process env or the real config dir.
990#[cfg(test)]
991fn load_resolved_from(
992    explicit: Option<&Path>,
993    resolved_existing: Option<&Path>,
994) -> Result<Config, ConfigError> {
995    match (explicit, resolved_existing) {
996        (Some(path), _) => load(Some(path)),
997        (None, Some(path)) => load(Some(path)),
998        (None, None) => Ok(Config::default()),
999    }
1000}
1001
1002fn validate(cfg: &mut Config) -> Result<(), String> {
1003    if cfg.fetch.timeout_secs == 0 {
1004        return Err("fetch.timeout_secs must be > 0".to_string());
1005    }
1006    if cfg.cache.min_ttl > cfg.cache.default_ttl {
1007        return Err(format!(
1008            "cache.min_ttl ({:?}) must be <= cache.default_ttl ({:?})",
1009            cfg.cache.min_ttl, cfg.cache.default_ttl
1010        ));
1011    }
1012    if cfg.cache.default_ttl > cfg.cache.max_ttl {
1013        return Err(format!(
1014            "cache.default_ttl ({:?}) must be <= cache.max_ttl ({:?})",
1015            cfg.cache.default_ttl, cfg.cache.max_ttl
1016        ));
1017    }
1018    for d in &mut cfg.cache.override_no_store_domains {
1019        d.make_ascii_lowercase();
1020    }
1021    if cfg.mcp.heartbeat_interval.is_zero() {
1022        return Err("mcp.heartbeat_interval must be > 0".to_string());
1023    }
1024    if cfg.mcp.reap_threshold.is_zero() {
1025        return Err("mcp.reap_threshold must be > 0".to_string());
1026    }
1027
1028    // RateLimitConfig
1029    if cfg.rate_limit.requests_per_minute_per_domain == 0 {
1030        return Err("rate_limit.requests_per_minute_per_domain must be > 0".to_string());
1031    }
1032    if cfg.rate_limit.requests_per_minute_per_domain > 6000 {
1033        return Err(format!(
1034            "rate_limit.requests_per_minute_per_domain ({}) exceeds sanity cap 6000 (100 req/s)",
1035            cfg.rate_limit.requests_per_minute_per_domain
1036        ));
1037    }
1038    if cfg.rate_limit.per_domain_concurrency == 0 {
1039        return Err("rate_limit.per_domain_concurrency must be > 0".to_string());
1040    }
1041    if cfg.rate_limit.global_concurrency == 0 {
1042        return Err("rate_limit.global_concurrency must be > 0".to_string());
1043    }
1044    if cfg.rate_limit.max_retries > 10 {
1045        return Err(format!(
1046            "rate_limit.max_retries ({}) exceeds sanity cap 10",
1047            cfg.rate_limit.max_retries
1048        ));
1049    }
1050    if cfg.rate_limit.initial_backoff > cfg.rate_limit.max_backoff {
1051        return Err(format!(
1052            "rate_limit.initial_backoff ({:?}) must be <= max_backoff ({:?})",
1053            cfg.rate_limit.initial_backoff, cfg.rate_limit.max_backoff
1054        ));
1055    }
1056    if cfg.rate_limit.retry_after_ceiling.is_zero() {
1057        return Err("rate_limit.retry_after_ceiling must be > 0".to_string());
1058    }
1059
1060    // RobotsConfig
1061    for d in &mut cfg.robots.ignore_domains {
1062        d.make_ascii_lowercase();
1063    }
1064    if cfg.robots.failure_ttl > cfg.robots.default_ttl {
1065        return Err(format!(
1066            "robots.failure_ttl ({:?}) must be <= robots.default_ttl ({:?})",
1067            cfg.robots.failure_ttl, cfg.robots.default_ttl
1068        ));
1069    }
1070
1071    Ok(())
1072}
1073
1074#[cfg(test)]
1075mod tests {
1076    use super::*;
1077    use std::io::Write;
1078
1079    #[test]
1080    fn apply_overrides_clamps_concurrency_minimum() {
1081        let mut cfg = Config::default();
1082        cfg.apply_overrides(None, Some(0), Some(0), None, false);
1083        assert_eq!(cfg.rate_limit.per_domain_concurrency, 1);
1084        assert_eq!(cfg.rate_limit.global_concurrency, 1);
1085    }
1086
1087    #[test]
1088    fn apply_overrides_leaves_unset_fields_untouched() {
1089        let mut cfg = Config::default();
1090        let baseline_rpm = cfg.rate_limit.requests_per_minute_per_domain;
1091        let baseline_retries = cfg.rate_limit.max_retries;
1092        let baseline_respect = cfg.robots.respect;
1093        cfg.apply_overrides(None, None, None, None, false);
1094        assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, baseline_rpm);
1095        assert_eq!(cfg.rate_limit.max_retries, baseline_retries);
1096        assert_eq!(cfg.robots.respect, baseline_respect);
1097    }
1098
1099    #[test]
1100    fn apply_overrides_disables_robots_when_requested() {
1101        let mut cfg = Config::default();
1102        // Start enabled so the assertion proves the override flips it, not just
1103        // that it matches the (now off-by-default) baseline.
1104        cfg.robots.respect = true;
1105        cfg.apply_overrides(None, None, None, None, true);
1106        assert!(!cfg.robots.respect);
1107    }
1108
1109    #[test]
1110    fn apply_overrides_sets_explicit_values() {
1111        let mut cfg = Config::default();
1112        cfg.apply_overrides(Some(30), Some(4), Some(16), Some(5), false);
1113        assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, 30);
1114        assert_eq!(cfg.rate_limit.per_domain_concurrency, 4);
1115        assert_eq!(cfg.rate_limit.global_concurrency, 16);
1116        assert_eq!(cfg.rate_limit.max_retries, 5);
1117    }
1118
1119    #[test]
1120    fn default_config_has_sensible_values() {
1121        let cfg = Config::default();
1122        assert!(cfg.fetch.user_agent.starts_with("Rover/"));
1123        assert_eq!(cfg.fetch.timeout_secs, 15);
1124
1125        // Cache defaults per PRD §12 (default_ttl tightened to 15m).
1126        assert_eq!(cfg.cache.default_ttl, Duration::from_secs(15 * 60));
1127        assert_eq!(cfg.cache.min_ttl, Duration::from_secs(300));
1128        assert_eq!(cfg.cache.max_ttl, Duration::from_secs(7 * 86400));
1129        assert!(!cfg.cache.override_no_store);
1130        assert!(cfg.cache.override_no_store_domains.is_empty());
1131        assert!(!cfg.cache.store_raw_html);
1132    }
1133
1134    #[test]
1135    fn load_with_no_path_returns_default() {
1136        let cfg = load(None).unwrap();
1137        assert_eq!(cfg.fetch.timeout_secs, 15);
1138    }
1139
1140    #[test]
1141    fn load_from_file_overrides_defaults() {
1142        let mut file = tempfile::NamedTempFile::new().unwrap();
1143        writeln!(
1144            file,
1145            r#"
1146[fetch]
1147user_agent = "test-ua"
1148timeout_secs = 5
1149"#
1150        )
1151        .unwrap();
1152
1153        let cfg = load(Some(file.path())).unwrap();
1154        assert_eq!(cfg.fetch.user_agent, "test-ua");
1155        assert_eq!(cfg.fetch.timeout_secs, 5);
1156    }
1157
1158    #[test]
1159    fn load_missing_file_errors() {
1160        let result = load(Some(Path::new("/no/such/path/__rover_test__.toml")));
1161        assert!(matches!(result, Err(ConfigError::Read { .. })));
1162    }
1163
1164    #[test]
1165    fn load_malformed_toml_errors() {
1166        let mut file = tempfile::NamedTempFile::new().unwrap();
1167        writeln!(file, "not = valid = toml").unwrap();
1168        let result = load(Some(file.path()));
1169        assert!(matches!(result, Err(ConfigError::Parse { .. })));
1170    }
1171
1172    #[test]
1173    fn load_unknown_field_errors() {
1174        let mut file = tempfile::NamedTempFile::new().unwrap();
1175        writeln!(
1176            file,
1177            r#"
1178[fetch]
1179unknown_field = "x"
1180"#
1181        )
1182        .unwrap();
1183        let result = load(Some(file.path()));
1184        assert!(matches!(result, Err(ConfigError::Parse { .. })));
1185    }
1186
1187    #[test]
1188    fn load_unknown_field_in_cache_errors() {
1189        let mut file = tempfile::NamedTempFile::new().unwrap();
1190        writeln!(
1191            file,
1192            r#"
1193[cache]
1194unknown_field = "x"
1195"#
1196        )
1197        .unwrap();
1198        let result = load(Some(file.path()));
1199        assert!(matches!(result, Err(ConfigError::Parse { .. })));
1200    }
1201
1202    #[test]
1203    fn load_rejects_zero_timeout() {
1204        let mut file = tempfile::NamedTempFile::new().unwrap();
1205        writeln!(
1206            file,
1207            r#"
1208[fetch]
1209timeout_secs = 0
1210"#
1211        )
1212        .unwrap();
1213        let result = load(Some(file.path()));
1214        assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1215    }
1216
1217    #[test]
1218    fn load_cache_overrides() {
1219        let mut file = tempfile::NamedTempFile::new().unwrap();
1220        writeln!(
1221            file,
1222            r#"
1223[cache]
1224default_ttl = "30m"
1225min_ttl = "1m"
1226max_ttl = "1d"
1227override_no_store = true
1228override_no_store_domains = ["docs.example.com"]
1229store_raw_html = true
1230"#
1231        )
1232        .unwrap();
1233
1234        let cfg = load(Some(file.path())).unwrap();
1235        assert_eq!(cfg.cache.default_ttl, Duration::from_secs(30 * 60));
1236        assert_eq!(cfg.cache.min_ttl, Duration::from_secs(60));
1237        assert_eq!(cfg.cache.max_ttl, Duration::from_secs(86400));
1238        assert!(cfg.cache.override_no_store);
1239        assert_eq!(
1240            cfg.cache.override_no_store_domains,
1241            vec!["docs.example.com".to_string()]
1242        );
1243        assert!(cfg.cache.store_raw_html);
1244    }
1245
1246    #[test]
1247    fn load_rejects_min_greater_than_default() {
1248        let mut file = tempfile::NamedTempFile::new().unwrap();
1249        writeln!(
1250            file,
1251            r#"
1252[cache]
1253default_ttl = "1m"
1254min_ttl = "10m"
1255"#
1256        )
1257        .unwrap();
1258        let result = load(Some(file.path()));
1259        assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1260    }
1261
1262    #[test]
1263    fn load_rejects_default_greater_than_max() {
1264        let mut file = tempfile::NamedTempFile::new().unwrap();
1265        writeln!(
1266            file,
1267            r#"
1268[cache]
1269default_ttl = "10d"
1270max_ttl = "1d"
1271"#
1272        )
1273        .unwrap();
1274        let result = load(Some(file.path()));
1275        assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1276    }
1277
1278    #[test]
1279    fn override_no_store_domains_normalized_to_lowercase() {
1280        let mut file = tempfile::NamedTempFile::new().unwrap();
1281        writeln!(
1282            file,
1283            r#"
1284[cache]
1285override_no_store_domains = ["DOCS.example.COM", "CDN.foo.com"]
1286"#
1287        )
1288        .unwrap();
1289        let cfg = load(Some(file.path())).unwrap();
1290        assert_eq!(
1291            cfg.cache.override_no_store_domains,
1292            vec!["docs.example.com".to_string(), "cdn.foo.com".to_string()]
1293        );
1294    }
1295
1296    #[test]
1297    fn load_accepts_equal_ttls() {
1298        let mut file = tempfile::NamedTempFile::new().unwrap();
1299        writeln!(
1300            file,
1301            r#"
1302[cache]
1303default_ttl = "1h"
1304min_ttl = "1h"
1305max_ttl = "1h"
1306"#
1307        )
1308        .unwrap();
1309        let cfg = load(Some(file.path())).unwrap();
1310        assert_eq!(cfg.cache.default_ttl, Duration::from_secs(3600));
1311    }
1312
1313    #[test]
1314    fn default_tokenizer_is_o200k() {
1315        let cfg = Config::default();
1316        assert_eq!(cfg.tokenizer.default, crate::tokenizer::Tokenizer::O200k);
1317    }
1318
1319    #[test]
1320    fn default_mcp_intervals() {
1321        let cfg = Config::default();
1322        assert_eq!(cfg.mcp.heartbeat_interval, Duration::from_secs(5));
1323        assert_eq!(cfg.mcp.reap_threshold, Duration::from_secs(60));
1324    }
1325
1326    #[test]
1327    fn load_tokenizer_override() {
1328        let mut file = tempfile::NamedTempFile::new().unwrap();
1329        writeln!(
1330            file,
1331            r#"
1332[tokenizer]
1333default = "claude"
1334"#
1335        )
1336        .unwrap();
1337        let cfg = load(Some(file.path())).unwrap();
1338        assert_eq!(cfg.tokenizer.default, crate::tokenizer::Tokenizer::Claude);
1339    }
1340
1341    #[test]
1342    fn load_unknown_tokenizer_errors() {
1343        let mut file = tempfile::NamedTempFile::new().unwrap();
1344        writeln!(
1345            file,
1346            r#"
1347[tokenizer]
1348default = "gpt-5"
1349"#
1350        )
1351        .unwrap();
1352        let result = load(Some(file.path()));
1353        assert!(matches!(result, Err(ConfigError::Parse { .. })));
1354    }
1355
1356    #[test]
1357    fn load_mcp_overrides() {
1358        let mut file = tempfile::NamedTempFile::new().unwrap();
1359        writeln!(
1360            file,
1361            r#"
1362[mcp]
1363heartbeat_interval = "10s"
1364reap_threshold = "2m"
1365"#
1366        )
1367        .unwrap();
1368        let cfg = load(Some(file.path())).unwrap();
1369        assert_eq!(cfg.mcp.heartbeat_interval, Duration::from_secs(10));
1370        assert_eq!(cfg.mcp.reap_threshold, Duration::from_secs(120));
1371    }
1372
1373    #[test]
1374    fn load_output_dir_override() {
1375        let mut file = tempfile::NamedTempFile::new().unwrap();
1376        writeln!(
1377            file,
1378            r#"
1379[output]
1380dir = "/tmp/rover-out"
1381"#
1382        )
1383        .unwrap();
1384        let cfg = load(Some(file.path())).unwrap();
1385        assert_eq!(
1386            cfg.output.dir.as_deref().unwrap().to_str(),
1387            Some("/tmp/rover-out")
1388        );
1389    }
1390
1391    #[test]
1392    fn load_rejects_zero_heartbeat() {
1393        let mut file = tempfile::NamedTempFile::new().unwrap();
1394        writeln!(
1395            file,
1396            r#"
1397[mcp]
1398heartbeat_interval = "0s"
1399"#
1400        )
1401        .unwrap();
1402        let result = load(Some(file.path()));
1403        assert!(matches!(result, Err(ConfigError::Invalid { .. })));
1404    }
1405
1406    #[test]
1407    fn default_rate_limit_matches_prd() {
1408        let cfg = Config::default();
1409        assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, 60);
1410        assert_eq!(cfg.rate_limit.per_domain_concurrency, 2);
1411        assert_eq!(cfg.rate_limit.global_concurrency, 8);
1412        assert_eq!(cfg.rate_limit.max_retries, 3);
1413    }
1414
1415    #[test]
1416    fn default_robots_matches_prd() {
1417        let cfg = Config::default();
1418        // Rover is an agent browser, not a crawler: robots enforcement is off
1419        // by default (opt in with `robots.respect = true`).
1420        assert!(!cfg.robots.respect);
1421        assert!(cfg.robots.ignore_domains.is_empty());
1422        assert_eq!(cfg.robots.default_ttl, Duration::from_secs(24 * 3600));
1423        assert_eq!(cfg.robots.failure_ttl, Duration::from_secs(300));
1424    }
1425
1426    #[test]
1427    fn load_rate_limit_overrides() {
1428        let mut file = tempfile::NamedTempFile::new().unwrap();
1429        writeln!(
1430            file,
1431            r#"
1432[rate_limit]
1433requests_per_minute_per_domain = 120
1434per_domain_concurrency = 4
1435global_concurrency = 16
1436max_retries = 5
1437initial_backoff = "250ms"
1438max_backoff = "60s"
1439retry_after_ceiling = "10m"
1440jitter_seed = 42
1441"#
1442        )
1443        .unwrap();
1444        let cfg = load(Some(file.path())).unwrap();
1445        assert_eq!(cfg.rate_limit.requests_per_minute_per_domain, 120);
1446        assert_eq!(cfg.rate_limit.max_retries, 5);
1447        assert_eq!(cfg.rate_limit.jitter_seed, Some(42));
1448    }
1449
1450    #[test]
1451    fn load_robots_overrides() {
1452        let mut file = tempfile::NamedTempFile::new().unwrap();
1453        writeln!(
1454            file,
1455            r#"
1456[robots]
1457respect = false
1458ignore_domains = ["FOO.example.com", "bar.example.org"]
1459default_ttl = "12h"
1460failure_ttl = "2m"
1461"#
1462        )
1463        .unwrap();
1464        let cfg = load(Some(file.path())).unwrap();
1465        assert!(!cfg.robots.respect);
1466        assert_eq!(
1467            cfg.robots.ignore_domains,
1468            vec!["foo.example.com".to_string(), "bar.example.org".to_string()]
1469        );
1470        assert_eq!(cfg.robots.default_ttl, Duration::from_secs(12 * 3600));
1471        assert_eq!(cfg.robots.failure_ttl, Duration::from_secs(120));
1472    }
1473
1474    #[test]
1475    fn load_rejects_zero_rpm() {
1476        let mut file = tempfile::NamedTempFile::new().unwrap();
1477        writeln!(
1478            file,
1479            r#"
1480[rate_limit]
1481requests_per_minute_per_domain = 0
1482"#
1483        )
1484        .unwrap();
1485        assert!(matches!(
1486            load(Some(file.path())),
1487            Err(ConfigError::Invalid { .. })
1488        ));
1489    }
1490
1491    #[test]
1492    fn load_rejects_rpm_above_sanity_cap() {
1493        let mut file = tempfile::NamedTempFile::new().unwrap();
1494        writeln!(
1495            file,
1496            r#"
1497[rate_limit]
1498requests_per_minute_per_domain = 100000
1499"#
1500        )
1501        .unwrap();
1502        assert!(matches!(
1503            load(Some(file.path())),
1504            Err(ConfigError::Invalid { .. })
1505        ));
1506    }
1507
1508    #[test]
1509    fn load_rejects_max_retries_above_10() {
1510        let mut file = tempfile::NamedTempFile::new().unwrap();
1511        writeln!(
1512            file,
1513            r#"
1514[rate_limit]
1515max_retries = 11
1516"#
1517        )
1518        .unwrap();
1519        assert!(matches!(
1520            load(Some(file.path())),
1521            Err(ConfigError::Invalid { .. })
1522        ));
1523    }
1524
1525    #[test]
1526    fn load_rejects_backoff_inversion() {
1527        let mut file = tempfile::NamedTempFile::new().unwrap();
1528        writeln!(
1529            file,
1530            r#"
1531[rate_limit]
1532initial_backoff = "10s"
1533max_backoff = "5s"
1534"#
1535        )
1536        .unwrap();
1537        assert!(matches!(
1538            load(Some(file.path())),
1539            Err(ConfigError::Invalid { .. })
1540        ));
1541    }
1542
1543    #[test]
1544    fn load_rejects_failure_ttl_above_default_ttl() {
1545        let mut file = tempfile::NamedTempFile::new().unwrap();
1546        writeln!(
1547            file,
1548            r#"
1549[robots]
1550default_ttl = "1m"
1551failure_ttl = "10m"
1552"#
1553        )
1554        .unwrap();
1555        assert!(matches!(
1556            load(Some(file.path())),
1557            Err(ConfigError::Invalid { .. })
1558        ));
1559    }
1560
1561    #[test]
1562    fn summarization_section_parses_with_defaults() {
1563        let toml = r#"
1564[summarization]
1565"#;
1566        let cfg: Config = toml::from_str(toml).unwrap();
1567        assert_eq!(cfg.summarization.default_backend, "default");
1568        assert_eq!(cfg.summarization.default_mode, "abstractive");
1569        assert_eq!(cfg.summarization.default_style, "prose");
1570        assert!(cfg.summarization.fallback_to_extractive);
1571        assert_eq!(cfg.summarization.tables.target_tokens, 150);
1572        assert!(cfg.summarization.tables.focus.contains("Describe"));
1573    }
1574
1575    #[test]
1576    fn summarization_tables_block_overrides_defaults() {
1577        let toml = r#"
1578[summarization.tables]
1579target_tokens = 250
1580focus = "Custom table focus prompt."
1581"#;
1582        let cfg: Config = toml::from_str(toml).unwrap();
1583        assert_eq!(cfg.summarization.tables.target_tokens, 250);
1584        assert_eq!(cfg.summarization.tables.focus, "Custom table focus prompt.");
1585        // Sibling defaults remain in force.
1586        assert_eq!(cfg.summarization.default_backend, "default");
1587    }
1588
1589    #[test]
1590    fn backends_section_parses_extractive_block() {
1591        let toml = r#"
1592[backends.default]
1593kind = "extractive"
1594"#;
1595        let cfg: Config = toml::from_str(toml).unwrap();
1596        assert_eq!(cfg.backends.len(), 1);
1597        let b = cfg.backends.get("default").unwrap();
1598        assert_eq!(b.kind, "extractive");
1599        assert!(b.provider.is_none());
1600    }
1601
1602    #[test]
1603    fn backends_section_parses_cloud_block_with_all_fields() {
1604        let toml = r#"
1605[backends.lm_studio]
1606kind = "cloud"
1607provider = "openai_compat"
1608base_url = "http://localhost:1234/v1"
1609model = "qwen3.5-0.8b"
1610api_key_env = "LM_KEY"
1611"#;
1612        let cfg: Config = toml::from_str(toml).unwrap();
1613        let b = cfg.backends.get("lm_studio").unwrap();
1614        assert_eq!(b.kind, "cloud");
1615        assert_eq!(b.provider.as_deref(), Some("openai_compat"));
1616        assert_eq!(b.base_url.as_deref(), Some("http://localhost:1234/v1"));
1617        assert_eq!(b.model.as_deref(), Some("qwen3.5-0.8b"));
1618        assert_eq!(b.api_key_env.as_deref(), Some("LM_KEY"));
1619    }
1620
1621    #[test]
1622    fn missing_summarization_section_yields_defaults() {
1623        let cfg: Config = toml::from_str("").unwrap();
1624        assert_eq!(cfg.summarization.default_backend, "default");
1625        assert!(cfg.backends.is_empty());
1626    }
1627
1628    #[test]
1629    fn ssrf_section_parses_with_defaults() {
1630        let toml = r#"
1631[ssrf]
1632"#;
1633        let cfg: Config = toml::from_str(toml).unwrap();
1634        assert_eq!(cfg.ssrf.level, "strict");
1635        assert_eq!(cfg.ssrf.project_root, std::path::PathBuf::from("."));
1636    }
1637
1638    #[test]
1639    fn ssrf_section_accepts_each_level() {
1640        for level in &["strict", "loopback", "project", "lan", "none"] {
1641            let toml = format!("[ssrf]\nlevel = \"{level}\"\n");
1642            let cfg: Config = toml::from_str(&toml).unwrap();
1643            assert_eq!(cfg.ssrf.level, *level);
1644        }
1645    }
1646
1647    #[test]
1648    fn ssrf_section_rejects_unknown_field() {
1649        let toml = r#"
1650[ssrf]
1651level = "strict"
1652bogus = 1
1653"#;
1654        let r: Result<Config, _> = toml::from_str(toml);
1655        assert!(r.is_err(), "expected deny_unknown_fields rejection");
1656    }
1657
1658    #[test]
1659    fn missing_ssrf_section_yields_defaults() {
1660        let cfg: Config = toml::from_str("").unwrap();
1661        assert_eq!(cfg.ssrf.level, "strict");
1662    }
1663
1664    #[test]
1665    fn debug_section_parses_with_defaults() {
1666        let cfg: Config = toml::from_str("[debug]\n").unwrap();
1667        assert_eq!(cfg.debug.har_path, "");
1668        assert_eq!(cfg.debug.har_body_cap, 64 * 1024);
1669        assert_eq!(cfg.debug.log_level, "info");
1670    }
1671
1672    #[test]
1673    fn debug_section_har_body_cap_accepts_humansize() {
1674        let cfg: Config = toml::from_str(
1675            r#"[debug]
1676har_body_cap = "1MiB"
1677"#,
1678        )
1679        .unwrap();
1680        assert_eq!(cfg.debug.har_body_cap, 1024 * 1024);
1681    }
1682
1683    #[test]
1684    fn debug_section_har_body_cap_accepts_integer_bytes() {
1685        let cfg: Config = toml::from_str(
1686            r#"[debug]
1687har_body_cap = 8192
1688"#,
1689        )
1690        .unwrap();
1691        assert_eq!(cfg.debug.har_body_cap, 8192);
1692    }
1693
1694    #[test]
1695    fn debug_section_rejects_unknown_field() {
1696        let r: Result<Config, _> = toml::from_str(
1697            r#"[debug]
1698har_path = ""
1699bogus = 1
1700"#,
1701        );
1702        assert!(r.is_err());
1703    }
1704
1705    #[test]
1706    fn image_captions_defaults_match_spec() {
1707        let c = ImageCaptionsConfig::default();
1708        assert_eq!(c.max_tokens, 50);
1709        assert_eq!(c.max_per_page, 10);
1710        assert_eq!(c.min_width, 200);
1711        assert_eq!(c.min_height, 200);
1712        assert_eq!(c.max_bytes, 10 * 1024 * 1024);
1713        assert_eq!(c.max_concurrent, 2);
1714    }
1715
1716    #[test]
1717    fn human_bytes_parses_common_forms() {
1718        assert_eq!(parse_human_bytes("1024").unwrap(), 1024);
1719        assert_eq!(parse_human_bytes("10MiB").unwrap(), 10 * 1024 * 1024);
1720        assert_eq!(parse_human_bytes("10MB").unwrap(), 10_000_000);
1721        assert_eq!(
1722            parse_human_bytes("1.5GiB").unwrap(),
1723            (1.5_f64 * 1024.0 * 1024.0 * 1024.0) as u64
1724        );
1725        assert!(parse_human_bytes("bogus").is_err());
1726    }
1727
1728    #[test]
1729    fn image_captions_deserializes_from_toml() {
1730        let toml_str = r#"
1731[image_captions]
1732default = "openai"
1733max_per_page = 5
1734min_width = 100
1735min_height = 100
1736max_bytes = "1MiB"
1737"#;
1738        let cfg: Config = toml::from_str(toml_str).unwrap();
1739        assert_eq!(cfg.image_captions.default.as_deref(), Some("openai"));
1740        assert_eq!(cfg.image_captions.max_per_page, 5);
1741        assert_eq!(cfg.image_captions.max_bytes, 1024 * 1024);
1742        assert_eq!(cfg.image_captions.max_tokens, 50);
1743    }
1744
1745    #[test]
1746    fn captioners_block_round_trips() {
1747        let toml_str = r#"
1748[captioners.openai]
1749kind = "cloud"
1750provider = "openai"
1751model = "gpt-4o-mini"
1752api_key_env = "OPENAI_API_KEY"
1753
1754[captioners.local]
1755kind = "local"
1756model = "HuggingFaceTB/SmolVLM-256M-Instruct"
1757"#;
1758        let cfg: Config = toml::from_str(toml_str).unwrap();
1759        assert_eq!(cfg.captioners.len(), 2);
1760        assert_eq!(
1761            cfg.captioners.get("openai").unwrap().provider.as_deref(),
1762            Some("openai")
1763        );
1764        assert_eq!(cfg.captioners.get("local").unwrap().kind, "local");
1765    }
1766
1767    #[test]
1768    fn headless_m9_keys_default_correctly() {
1769        let h = HeadlessConfig::default();
1770        assert_eq!(h.max_concurrent, 4);
1771        assert!(h.chrome_executable.is_empty());
1772    }
1773
1774    #[test]
1775    fn prompt_injection_defaults_when_absent() {
1776        let cfg: Config = toml::from_str("").unwrap();
1777        assert_eq!(cfg.prompt_injection.level, "moderate");
1778        assert_eq!(cfg.prompt_injection.model, "disabled");
1779        assert!((cfg.prompt_injection.model_threshold - 0.9).abs() < f64::EPSILON);
1780        assert!(cfg.prompt_injection.allowlist.wrap.is_empty());
1781        assert!(cfg.prompt_injection.allowlist.patterns.is_empty());
1782        assert!(cfg.prompt_injection.allowlist.model.is_empty());
1783        assert!(!cfg.prompt_injection.agent_overrides.wrap);
1784        assert!(!cfg.prompt_injection.agent_overrides.patterns);
1785        assert!(!cfg.prompt_injection.agent_overrides.model);
1786        assert!(!cfg.prompt_injection.agent_overrides.level);
1787    }
1788
1789    #[test]
1790    fn prompt_injection_parses_full_block() {
1791        let toml = r#"
1792[prompt_injection]
1793level = "strict"
1794model = "deberta-base"
1795model_threshold = 0.75
1796
1797[prompt_injection.allowlist]
1798wrap = ["https://*.internal.example.com/*"]
1799patterns = ["*"]
1800model = []
1801
1802[prompt_injection.agent_overrides]
1803wrap = true
1804patterns = false
1805model = true
1806level = true
1807"#;
1808        let cfg: Config = toml::from_str(toml).unwrap();
1809        assert_eq!(cfg.prompt_injection.level, "strict");
1810        assert_eq!(cfg.prompt_injection.model, "deberta-base");
1811        assert!((cfg.prompt_injection.model_threshold - 0.75).abs() < f64::EPSILON);
1812        assert_eq!(
1813            cfg.prompt_injection.allowlist.wrap,
1814            vec!["https://*.internal.example.com/*".to_string()]
1815        );
1816        assert_eq!(
1817            cfg.prompt_injection.allowlist.patterns,
1818            vec!["*".to_string()]
1819        );
1820        assert!(cfg.prompt_injection.agent_overrides.wrap);
1821        assert!(!cfg.prompt_injection.agent_overrides.patterns);
1822        assert!(cfg.prompt_injection.agent_overrides.model);
1823        assert!(cfg.prompt_injection.agent_overrides.level);
1824    }
1825
1826    #[test]
1827    fn prompt_injection_rejects_unknown_field() {
1828        let toml = "[prompt_injection]\nbogus = 1\n";
1829        let r: Result<Config, _> = toml::from_str(toml);
1830        assert!(r.is_err(), "expected deny_unknown_fields rejection");
1831    }
1832
1833    #[test]
1834    fn config_candidates_prefers_rover_config_env_as_sole_candidate() {
1835        let c = config_candidates_from(Some("/custom/x.toml"), Some(Path::new("/cfg")));
1836        assert_eq!(c, vec![std::path::PathBuf::from("/custom/x.toml")]);
1837    }
1838
1839    #[test]
1840    fn config_candidates_searches_platform_then_cwd() {
1841        let c = config_candidates_from(None, Some(Path::new("/cfg")));
1842        assert_eq!(
1843            c,
1844            vec![
1845                std::path::PathBuf::from("/cfg/rover/rover.toml"),
1846                std::path::PathBuf::from("rover.toml"),
1847            ]
1848        );
1849    }
1850
1851    #[test]
1852    fn config_candidates_falls_back_to_cwd_rover_toml() {
1853        let c = config_candidates_from(None, None);
1854        assert_eq!(c, vec![std::path::PathBuf::from("rover.toml")]);
1855    }
1856
1857    #[test]
1858    fn resolve_existing_prefers_platform_over_cwd_candidate() {
1859        // Lay down <tmp>/rover/rover.toml and confirm it is the chosen file.
1860        let tmp = tempfile::tempdir().unwrap();
1861        let rover_dir = tmp.path().join("rover");
1862        std::fs::create_dir_all(&rover_dir).unwrap();
1863        let platform_file = rover_dir.join("rover.toml");
1864        std::fs::write(&platform_file, "[fetch]\ntimeout_secs = 3\n").unwrap();
1865
1866        let resolved = config_candidates_from(None, Some(tmp.path()))
1867            .into_iter()
1868            .find(|p| p.is_file());
1869        assert_eq!(resolved, Some(platform_file));
1870    }
1871
1872    #[test]
1873    fn resolve_existing_is_none_when_no_candidate_exists() {
1874        let tmp = tempfile::tempdir().unwrap();
1875        // tmp has no rover/rover.toml, and the crate root has no ./rover.toml.
1876        let resolved = config_candidates_from(None, Some(tmp.path()))
1877            .into_iter()
1878            .find(|p| p.is_file());
1879        assert_eq!(resolved, None);
1880    }
1881
1882    #[test]
1883    fn load_resolved_uses_explicit_path_when_present() {
1884        let mut file = tempfile::NamedTempFile::new().unwrap();
1885        writeln!(file, "[fetch]\ntimeout_secs = 7\n").unwrap();
1886        // A resolved default must be ignored when --config is supplied.
1887        let cfg = load_resolved_from(Some(file.path()), None).unwrap();
1888        assert_eq!(cfg.fetch.timeout_secs, 7);
1889    }
1890
1891    #[test]
1892    fn load_resolved_errors_when_explicit_path_missing() {
1893        // An explicit --config typo must fail loudly, NOT fall back to the
1894        // resolved default or to built-in defaults.
1895        let mut default_file = tempfile::NamedTempFile::new().unwrap();
1896        writeln!(default_file, "[fetch]\ntimeout_secs = 9\n").unwrap();
1897        let result = load_resolved_from(
1898            Some(Path::new("/no/such/__rover_explicit__.toml")),
1899            Some(default_file.path()),
1900        );
1901        assert!(matches!(result, Err(ConfigError::Read { .. })));
1902    }
1903
1904    #[test]
1905    fn load_resolved_loads_resolved_default_when_no_explicit() {
1906        let mut file = tempfile::NamedTempFile::new().unwrap();
1907        writeln!(file, "[fetch]\ntimeout_secs = 11\n").unwrap();
1908        let cfg = load_resolved_from(None, Some(file.path())).unwrap();
1909        assert_eq!(cfg.fetch.timeout_secs, 11);
1910    }
1911
1912    #[test]
1913    fn load_resolved_falls_back_to_defaults_when_nothing_resolves() {
1914        let cfg = load_resolved_from(None, None).unwrap();
1915        assert_eq!(cfg.fetch.timeout_secs, default_timeout_secs());
1916    }
1917}