blz_core/config.rs
1//! Configuration management for blz cache system.
2//!
3//! This module provides hierarchical configuration with global defaults and per-source overrides.
4//! Configuration is stored in TOML format and supports environment variable overrides.
5//!
6//! ## Configuration Hierarchy
7//!
8//! 1. **Global config**: Platform-specific config directory (see `GlobalConfig` docs)
9//! 2. **Per-source config**: `<source_dir>/settings.toml`
10//! 3. **Environment variables**: `CACHE_*` prefix
11//!
12//! ## Examples
13//!
14//! ### Loading global configuration:
15//!
16//! ```rust
17//! use blz_core::{Config, Result};
18//!
19//! // Load from default location or create with defaults
20//! let config = Config::load()?;
21//! println!("Cache root: {}", config.paths.root.display());
22//! println!("Refresh interval: {} hours", config.defaults.refresh_hours);
23//! # Ok::<(), blz_core::Error>(())
24//! ```
25//!
26//! ### Working with tool-specific configuration:
27//!
28//! ```rust,no_run
29//! use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
30//! use std::path::Path;
31//!
32//! let tool_config = ToolConfig {
33//! meta: ToolMeta {
34//! name: "react".to_string(),
35//! display_name: Some("React Documentation".to_string()),
36//! homepage: Some("https://react.dev".to_string()),
37//! repo: Some("https://github.com/facebook/react".to_string()),
38//! },
39//! fetch: FetchConfig {
40//! refresh_hours: Some(12), // Override global default
41//! follow_links: None, // Use global default
42//! allowlist: None, // Use global default
43//! },
44//! index: IndexConfig {
45//! max_heading_block_lines: Some(500),
46//! },
47//! };
48//!
49//! // Save to file
50//! tool_config.save(Path::new("react/settings.toml"))?;
51//! # Ok::<(), blz_core::Error>(())
52//! ```
53
54use crate::{Error, Result};
55use serde::{Deserialize, Serialize};
56use std::fs;
57use std::path::{Path, PathBuf};
58
59/// Global configuration for the blz cache system.
60///
61/// Contains default settings that apply to all sources unless overridden by per-source configuration.
62/// Configuration is automatically loaded from the system config directory or created with sensible defaults.
63///
64/// ## File Location
65///
66/// The configuration file is stored at (searched in order):
67/// - XDG: `$XDG_CONFIG_HOME/blz/config.toml` or `~/.config/blz/config.toml`
68/// - Dotfile fallback: `~/.blz/config.toml`
69///
70/// A `config.local.toml` in the same directory overrides keys from `config.toml`.
71///
72/// ## Example Configuration File
73///
74/// ```toml
75/// [defaults]
76/// refresh_hours = 24
77/// max_archives = 10
78/// fetch_enabled = true
79/// follow_links = "first_party"
80/// allowlist = ["docs.rs", "developer.mozilla.org"]
81///
82/// [paths]
83/// root = "/home/user/.outfitter/blz"
84/// ```
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct Config {
87 /// Default settings for all sources
88 pub defaults: DefaultsConfig,
89 /// File system paths configuration
90 pub paths: PathsConfig,
91}
92
93/// Default settings that apply to all sources unless overridden.
94///
95/// These settings control fetching behavior, caching policies, and link following rules.
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct DefaultsConfig {
98 /// How often to refresh cached content (in hours).
99 ///
100 /// Sources are only re-fetched if they haven't been updated within this interval.
101 /// Set to 0 to always fetch on access.
102 pub refresh_hours: u32,
103
104 /// Maximum number of archived versions to keep per source.
105 ///
106 /// When a source is updated, the previous version is archived. This setting
107 /// controls how many historical versions to retain for diff generation.
108 pub max_archives: usize,
109
110 /// Whether fetching from remote sources is enabled.
111 ///
112 /// When disabled, only locally cached content is used. Useful for offline work
113 /// or environments with restricted network access.
114 pub fetch_enabled: bool,
115
116 /// Policy for following links in llms.txt files.
117 ///
118 /// Controls whether and which external links should be followed when processing
119 /// llms.txt files that contain references to other documentation sources.
120 pub follow_links: FollowLinks,
121
122 /// Domains allowed for link following.
123 ///
124 /// Only used when `follow_links` is set to `Allowlist`. Links to domains
125 /// not in this list will be ignored.
126 pub allowlist: Vec<String>,
127
128 /// Prefer upgrading/using llms-full.txt when available.
129 /// When true, update operations default to choosing llms-full.txt where available.
130 #[serde(default)]
131 pub prefer_llms_full: bool,
132}
133
134/// Policy for following external links in llms.txt files.
135///
136/// This controls how the system handles links to other documentation sources
137/// within llms.txt files.
138#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
139#[serde(rename_all = "snake_case")]
140pub enum FollowLinks {
141 /// Never follow external links.
142 ///
143 /// Only process the original llms.txt file, ignoring any links to other sources.
144 None,
145
146 /// Follow links to the same domain and its immediate subdomains.
147 ///
148 /// For example, if processing `docs.example.com/llms.txt`, links to
149 /// `api.example.com/docs` or `example.com/guide` would be followed,
150 /// but `other-site.com/docs` would be ignored.
151 FirstParty,
152
153 /// Only follow links to domains in the allowlist.
154 ///
155 /// Use the `allowlist` field in `DefaultsConfig` to specify which domains
156 /// are permitted. This provides fine-grained control over which external
157 /// sources are trusted.
158 Allowlist,
159}
160
161/// File system paths configuration.
162///
163/// Defines where cached content, indices, and metadata are stored on the local filesystem.
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct PathsConfig {
166 /// Root directory for all cached content.
167 ///
168 /// Each source gets its own subdirectory under this root. The directory
169 /// structure is: `root/<source_alias>/`
170 ///
171 /// Default locations:
172 /// - Linux: `~/.local/share/blz`
173 /// - macOS: `~/Library/Application Support/dev.outfitter.blz`
174 /// - Windows: `%APPDATA%\outfitter\blz`
175 pub root: PathBuf,
176}
177
178impl Config {
179 /// Load configuration from the default location or create with defaults.
180 ///
181 /// This method attempts to load the configuration from the system config directory.
182 /// If the file doesn't exist, it returns a configuration with sensible defaults.
183 /// If the file exists but is malformed, it returns an error.
184 ///
185 /// # Returns
186 ///
187 /// Returns the loaded configuration or a default configuration if no file exists.
188 ///
189 /// # Errors
190 ///
191 /// Returns an error if:
192 /// - The config directory cannot be determined (unsupported platform)
193 /// - The config file exists but cannot be read (permissions, I/O error)
194 /// - The config file exists but contains invalid TOML syntax
195 /// - The config file exists but contains invalid configuration values
196 ///
197 /// # Examples
198 ///
199 /// ```rust
200 /// use blz_core::Config;
201 ///
202 /// // Load existing config or create with defaults
203 /// let config = Config::load()?;
204 ///
205 /// if config.defaults.fetch_enabled {
206 /// println!("Fetching is enabled");
207 /// }
208 /// # Ok::<(), blz_core::Error>(())
209 /// ```
210 pub fn load() -> Result<Self> {
211 // Determine base config path (BLZ_CONFIG/BLZ_CONFIG_DIR, XDG, dotfile), or use defaults
212 let base_path = Self::existing_config_path()?;
213
214 // Load base
215 let mut base_value: toml::Value = if let Some(ref path) = base_path {
216 let content = fs::read_to_string(path)
217 .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
218 toml::from_str(&content)
219 .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))?
220 } else {
221 let default_str = toml::to_string(&Self::default())
222 .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?;
223 toml::from_str(&default_str)
224 .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?
225 };
226
227 // Merge optional local override next to resolved base directory
228 let base_dir = base_path.as_deref().map_or_else(
229 || {
230 Self::canonical_config_path().map_or_else(
231 |_| PathBuf::new(),
232 |p| p.parent().map(Path::to_path_buf).unwrap_or_default(),
233 )
234 },
235 |bp| bp.parent().map(Path::to_path_buf).unwrap_or_default(),
236 );
237
238 let local_path = base_dir.join("config.local.toml");
239 if local_path.exists() {
240 let content = fs::read_to_string(&local_path)
241 .map_err(|e| Error::Config(format!("Failed to read local config: {e}")))?;
242 let local_value: toml::Value = toml::from_str(&content)
243 .map_err(|e| Error::Config(format!("Failed to parse local config: {e}")))?;
244 Self::merge_toml(&mut base_value, &local_value);
245 }
246
247 // Deserialize
248 let mut config: Self = base_value
249 .try_into()
250 .map_err(|e| Error::Config(format!("Failed to materialize config: {e}")))?;
251
252 // Apply env overrides
253 config.apply_env_overrides();
254
255 Ok(config)
256 }
257
258 /// Save the configuration to the default location.
259 ///
260 /// This method serializes the configuration to TOML format and writes it to
261 /// the system config directory. Parent directories are created if they don't exist.
262 ///
263 /// # Errors
264 ///
265 /// Returns an error if:
266 /// - The config directory cannot be determined (unsupported platform)
267 /// - Parent directories cannot be created (permissions, disk space)
268 /// - The configuration cannot be serialized to TOML
269 /// - The file cannot be written (permissions, disk space, I/O error)
270 ///
271 /// # Examples
272 ///
273 /// ```rust,no_run
274 /// use blz_core::{Config, DefaultsConfig, PathsConfig, FollowLinks};
275 /// use std::path::PathBuf;
276 ///
277 /// let mut config = Config::load()?;
278 /// config.defaults.refresh_hours = 12; // Update refresh interval
279 /// config.save()?; // Persist changes
280 /// # Ok::<(), blz_core::Error>(())
281 /// ```
282 pub fn save(&self) -> Result<()> {
283 let config_path = Self::save_target_path()?;
284 let parent = config_path
285 .parent()
286 .ok_or_else(|| Error::Config("Invalid config path".into()))?;
287
288 fs::create_dir_all(parent)
289 .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
290
291 let content = toml::to_string_pretty(self)
292 .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
293
294 let tmp = parent.join("config.toml.tmp");
295 fs::write(&tmp, &content)
296 .map_err(|e| Error::Config(format!("Failed to write temp config: {e}")))?;
297 // Best-effort atomic replace; on Windows, rename() replaces if target does not exist.
298 // SAFETY: config.toml write is replaced in one step to avoid torn files.
299 #[cfg(target_os = "windows")]
300 if config_path.exists() {
301 fs::remove_file(&config_path)
302 .map_err(|e| Error::Config(format!("Failed to remove existing config: {e}")))?;
303 }
304 std::fs::rename(&tmp, &config_path)
305 .map_err(|e| Error::Config(format!("Failed to replace config: {e}")))?;
306
307 Ok(())
308 }
309
310 /// Get the path where the global configuration file is stored.
311 ///
312 /// Uses the system-appropriate config directory based on the platform:
313 /// - Linux: `~/.config/blz/global.toml`
314 /// - macOS: `~/Library/Application Support/dev.outfitter.blz/global.toml`
315 /// - Windows: `%APPDATA%\outfitter\blz\global.toml`
316 ///
317 /// # Errors
318 ///
319 /// Returns an error if the system config directory cannot be determined,
320 /// which may happen on unsupported platforms or in sandboxed environments.
321 fn canonical_config_path() -> Result<PathBuf> {
322 let xdg = std::env::var("XDG_CONFIG_HOME")
323 .ok()
324 .map(PathBuf::from)
325 .or_else(|| directories::BaseDirs::new().map(|b| b.home_dir().join(".config")))
326 .ok_or_else(|| Error::Config("Failed to determine XDG config directory".into()))?;
327 Ok(xdg.join("blz").join("config.toml"))
328 }
329
330 fn dotfile_config_path() -> Result<PathBuf> {
331 let home = directories::BaseDirs::new()
332 .map(|b| b.home_dir().to_path_buf())
333 .ok_or_else(|| Error::Config("Failed to determine home directory".into()))?;
334 Ok(home.join(".blz").join("config.toml"))
335 }
336
337 fn existing_config_path() -> Result<Option<PathBuf>> {
338 // 1) BLZ_CONFIG (file)
339 if let Ok(explicit) = std::env::var("BLZ_CONFIG") {
340 let explicit = explicit.trim();
341 if !explicit.is_empty() {
342 let p = PathBuf::from(explicit);
343 if p.is_file() && p.exists() {
344 return Ok(Some(p));
345 }
346 }
347 }
348
349 // 2) BLZ_CONFIG_DIR (dir)
350 if let Ok(dir) = std::env::var("BLZ_CONFIG_DIR") {
351 let dir = dir.trim();
352 if !dir.is_empty() {
353 let p = PathBuf::from(dir).join("config.toml");
354 if p.is_file() && p.exists() {
355 return Ok(Some(p));
356 }
357 }
358 }
359
360 // 3) XDG
361 let xdg = Self::canonical_config_path()?;
362 if xdg.exists() {
363 return Ok(Some(xdg));
364 }
365 // 4) Dotfile
366 let dot = Self::dotfile_config_path()?;
367 if dot.exists() {
368 return Ok(Some(dot));
369 }
370 Ok(None)
371 }
372
373 fn save_target_path() -> Result<PathBuf> {
374 if let Some(existing) = Self::existing_config_path()? {
375 return Ok(existing);
376 }
377 Self::canonical_config_path()
378 }
379
380 fn merge_toml(dst: &mut toml::Value, src: &toml::Value) {
381 use toml::Value::Table;
382 match (dst, src) {
383 (Table(dst_tbl), Table(src_tbl)) => {
384 for (k, v) in src_tbl {
385 match dst_tbl.get_mut(k) {
386 Some(dst_v) => Self::merge_toml(dst_v, v),
387 None => {
388 dst_tbl.insert(k.clone(), v.clone());
389 },
390 }
391 }
392 },
393 (dst_v, src_v) => *dst_v = src_v.clone(),
394 }
395 }
396
397 fn apply_env_overrides(&mut self) {
398 if let Ok(v) = std::env::var("BLZ_REFRESH_HOURS") {
399 if let Ok(n) = v.parse::<u32>() {
400 self.defaults.refresh_hours = n;
401 }
402 }
403 if let Ok(v) = std::env::var("BLZ_MAX_ARCHIVES") {
404 if let Ok(n) = v.parse::<usize>() {
405 self.defaults.max_archives = n;
406 }
407 }
408 if let Ok(v) = std::env::var("BLZ_FETCH_ENABLED") {
409 let norm = v.to_ascii_lowercase();
410 self.defaults.fetch_enabled = matches!(norm.as_str(), "1" | "true" | "yes" | "on");
411 }
412 if let Ok(v) = std::env::var("BLZ_FOLLOW_LINKS") {
413 match v.to_ascii_lowercase().as_str() {
414 "none" => self.defaults.follow_links = FollowLinks::None,
415 "first_party" | "firstparty" => {
416 self.defaults.follow_links = FollowLinks::FirstParty;
417 },
418 "allowlist" => self.defaults.follow_links = FollowLinks::Allowlist,
419 _ => {},
420 }
421 }
422 if let Ok(v) = std::env::var("BLZ_ALLOWLIST") {
423 let list = v
424 .split(',')
425 .map(|s| s.trim().to_string())
426 .filter(|s| !s.is_empty())
427 .collect::<Vec<_>>();
428 if !list.is_empty() {
429 self.defaults.allowlist = list;
430 }
431 }
432 if let Ok(v) = std::env::var("BLZ_ROOT") {
433 let p = PathBuf::from(v);
434 if !p.as_os_str().is_empty() {
435 self.paths.root = p;
436 }
437 }
438 if let Ok(v) = std::env::var("BLZ_PREFER_LLMS_FULL") {
439 let s = v.trim().to_ascii_lowercase();
440 self.defaults.prefer_llms_full = matches!(s.as_str(), "1" | "true" | "yes" | "on");
441 }
442 }
443}
444
445impl Default for Config {
446 fn default() -> Self {
447 Self {
448 defaults: DefaultsConfig {
449 refresh_hours: 24,
450 max_archives: 10,
451 fetch_enabled: true,
452 follow_links: FollowLinks::FirstParty,
453 allowlist: Vec::new(),
454 prefer_llms_full: false,
455 },
456 paths: PathsConfig {
457 root: directories::ProjectDirs::from("dev", "outfitter", "blz").map_or_else(
458 || {
459 // Expand home directory properly
460 directories::BaseDirs::new().map_or_else(
461 || PathBuf::from(".outfitter/blz"),
462 |base| base.home_dir().join(".outfitter").join("blz"),
463 )
464 },
465 |dirs| dirs.data_dir().to_path_buf(),
466 ),
467 },
468 }
469 }
470}
471
472/// Per-source configuration that overrides global defaults.
473///
474/// Each documentation source can have its own configuration file (`settings.toml`)
475/// that overrides the global configuration for that specific source. This allows
476/// fine-grained control over fetching behavior, indexing parameters, and metadata.
477///
478/// ## File Location
479///
480/// Stored as `<cache_root>/<source_alias>/settings.toml`
481///
482/// ## Example Configuration File
483///
484/// ```toml
485/// [meta]
486/// name = "react"
487/// display_name = "React Documentation"
488/// homepage = "https://react.dev"
489/// repo = "https://github.com/facebook/react"
490///
491/// [fetch]
492/// refresh_hours = 12 # Override global default
493/// follow_links = "first_party"
494/// allowlist = ["reactjs.org", "react.dev"]
495///
496/// [index]
497/// max_heading_block_lines = 500
498/// ```
499#[derive(Debug, Clone, Serialize, Deserialize)]
500pub struct ToolConfig {
501 /// Metadata about the documentation source
502 pub meta: ToolMeta,
503 /// Fetching behavior overrides
504 pub fetch: FetchConfig,
505 /// Indexing parameter overrides
506 pub index: IndexConfig,
507}
508
509/// Metadata about a documentation source.
510///
511/// This information is used for display purposes and to provide context
512/// about the source of documentation being cached.
513#[derive(Debug, Clone, Serialize, Deserialize)]
514pub struct ToolMeta {
515 /// Unique identifier for this source (used as directory name).
516 ///
517 /// Should be a valid filename that uniquely identifies the source.
518 /// Typically lowercase with hyphens (e.g., "react", "node-js", "rust-std").
519 pub name: String,
520
521 /// Human-readable display name for the source.
522 ///
523 /// Used in search results and UI displays. If not provided, the `name`
524 /// field is used as fallback.
525 pub display_name: Option<String>,
526
527 /// Homepage URL for the documentation source.
528 ///
529 /// The main website or documentation portal for this source.
530 /// Used for reference and linking back to the original documentation.
531 pub homepage: Option<String>,
532
533 /// Repository URL for the documentation source.
534 ///
535 /// Link to the source code repository, if available. Useful for
536 /// understanding the project context and accessing source code.
537 pub repo: Option<String>,
538}
539
540/// Per-source fetching behavior overrides.
541///
542/// These settings override the global defaults for fetching behavior.
543/// Any `None` values will use the corresponding global default setting.
544#[derive(Debug, Clone, Serialize, Deserialize)]
545pub struct FetchConfig {
546 /// Override for refresh interval in hours.
547 ///
548 /// If `Some`, overrides the global `refresh_hours` setting for this source.
549 /// If `None`, uses the global default.
550 pub refresh_hours: Option<u32>,
551
552 /// Override for link following policy.
553 ///
554 /// If `Some`, overrides the global `follow_links` setting for this source.
555 /// If `None`, uses the global default.
556 pub follow_links: Option<FollowLinks>,
557
558 /// Override for allowed domains list.
559 ///
560 /// If `Some`, overrides the global `allowlist` setting for this source.
561 /// If `None`, uses the global default. Only used when `follow_links` is `Allowlist`.
562 pub allowlist: Option<Vec<String>>,
563}
564
565/// Per-source indexing parameter overrides.
566///
567/// These settings control how the documentation is processed and indexed
568/// for this specific source, overriding global defaults where specified.
569#[derive(Debug, Clone, Serialize, Deserialize)]
570pub struct IndexConfig {
571 /// Maximum lines to include in a single heading block.
572 ///
573 /// Controls how large sections are broken up during indexing. Larger values
574 /// include more context but may reduce search precision. Smaller values
575 /// provide more focused results but may split related content.
576 ///
577 /// If `None`, uses a sensible default based on content analysis.
578 pub max_heading_block_lines: Option<usize>,
579}
580
581impl ToolConfig {
582 /// Load per-source configuration from a file.
583 ///
584 /// Loads and parses a TOML configuration file for a specific documentation source.
585 /// The file should contain sections for `[meta]`, `[fetch]`, and `[index]`.
586 ///
587 /// # Arguments
588 ///
589 /// * `path` - Path to the configuration file (typically `settings.toml`)
590 ///
591 /// # Returns
592 ///
593 /// Returns the parsed configuration.
594 ///
595 /// # Errors
596 ///
597 /// Returns an error if:
598 /// - The file cannot be read (doesn't exist, permissions, I/O error)
599 /// - The file contains invalid TOML syntax
600 /// - The file contains invalid configuration values
601 /// - Required fields are missing (e.g., `meta.name`)
602 ///
603 /// # Examples
604 ///
605 /// ```rust,no_run
606 /// use blz_core::ToolConfig;
607 /// use std::path::Path;
608 ///
609 /// // Load source-specific configuration
610 /// let config_path = Path::new("sources/react/settings.toml");
611 /// let tool_config = ToolConfig::load(config_path)?;
612 ///
613 /// println!("Source: {}", tool_config.meta.name);
614 /// if let Some(refresh) = tool_config.fetch.refresh_hours {
615 /// println!("Custom refresh interval: {} hours", refresh);
616 /// }
617 /// # Ok::<(), blz_core::Error>(())
618 /// ```
619 pub fn load(path: &Path) -> Result<Self> {
620 let content = fs::read_to_string(path)
621 .map_err(|e| Error::Config(format!("Failed to read tool config: {e}")))?;
622 toml::from_str(&content)
623 .map_err(|e| Error::Config(format!("Failed to parse tool config: {e}")))
624 }
625
626 /// Save per-source configuration to a file.
627 ///
628 /// Serializes the configuration to TOML format and writes it to the specified path.
629 /// The parent directory must already exist.
630 ///
631 /// # Arguments
632 ///
633 /// * `path` - Path where to save the configuration file
634 ///
635 /// # Errors
636 ///
637 /// Returns an error if:
638 /// - The configuration cannot be serialized to TOML
639 /// - The parent directory doesn't exist
640 /// - The file cannot be written (permissions, disk space, I/O error)
641 ///
642 /// # Examples
643 ///
644 /// ```rust,no_run
645 /// use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
646 /// use std::path::Path;
647 ///
648 /// let config = ToolConfig {
649 /// meta: ToolMeta {
650 /// name: "my-docs".to_string(),
651 /// display_name: Some("My Documentation".to_string()),
652 /// homepage: None,
653 /// repo: None,
654 /// },
655 /// fetch: FetchConfig {
656 /// refresh_hours: Some(6),
657 /// follow_links: None,
658 /// allowlist: None,
659 /// },
660 /// index: IndexConfig {
661 /// max_heading_block_lines: Some(300),
662 /// },
663 /// };
664 ///
665 /// let config_path = Path::new("my-docs/settings.toml");
666 /// config.save(config_path)?;
667 /// # Ok::<(), blz_core::Error>(())
668 /// ```
669 pub fn save(&self, path: &Path) -> Result<()> {
670 let content = toml::to_string_pretty(self)
671 .map_err(|e| Error::Config(format!("Failed to serialize tool config: {e}")))?;
672 fs::write(path, content)
673 .map_err(|e| Error::Config(format!("Failed to write tool config: {e}")))?;
674 Ok(())
675 }
676}
677
678#[cfg(test)]
679#[allow(
680 clippy::panic,
681 clippy::disallowed_macros,
682 clippy::unwrap_used,
683 clippy::unnecessary_wraps
684)]
685mod tests {
686 use super::*;
687 use proptest::prelude::*;
688 use std::fs;
689 use tempfile::TempDir;
690
691 // Test fixtures
692 fn create_test_config() -> Config {
693 Config {
694 defaults: DefaultsConfig {
695 refresh_hours: 12,
696 max_archives: 5,
697 fetch_enabled: true,
698 follow_links: FollowLinks::Allowlist,
699 allowlist: vec!["example.com".to_string(), "docs.rs".to_string()],
700 prefer_llms_full: false,
701 },
702 paths: PathsConfig {
703 root: PathBuf::from("/tmp/test"),
704 },
705 }
706 }
707
708 fn create_test_tool_config() -> ToolConfig {
709 ToolConfig {
710 meta: ToolMeta {
711 name: "test-tool".to_string(),
712 display_name: Some("Test Tool".to_string()),
713 homepage: Some("https://test.com".to_string()),
714 repo: Some("https://github.com/test/tool".to_string()),
715 },
716 fetch: FetchConfig {
717 refresh_hours: Some(6),
718 follow_links: Some(FollowLinks::FirstParty),
719 allowlist: Some(vec!["allowed.com".to_string()]),
720 },
721 index: IndexConfig {
722 max_heading_block_lines: Some(100),
723 },
724 }
725 }
726
727 #[test]
728 fn test_default_config_values() {
729 // Given: Default configuration is requested
730 let config = Config::default();
731
732 // When: Examining default values
733 // Then: Should have sensible defaults
734 assert_eq!(config.defaults.refresh_hours, 24);
735 assert_eq!(config.defaults.max_archives, 10);
736 assert!(config.defaults.fetch_enabled);
737 assert!(matches!(
738 config.defaults.follow_links,
739 FollowLinks::FirstParty
740 ));
741 assert!(config.defaults.allowlist.is_empty());
742 assert!(!config.paths.root.as_os_str().is_empty());
743 }
744
745 #[test]
746 fn test_follow_links_serialization() -> Result<()> {
747 // Given: Different FollowLinks variants
748 let variants = vec![
749 FollowLinks::None,
750 FollowLinks::FirstParty,
751 FollowLinks::Allowlist,
752 ];
753
754 for variant in variants {
755 // When: Serializing and deserializing
756 let serialized = serde_json::to_string(&variant)?;
757 let deserialized: FollowLinks = serde_json::from_str(&serialized)?;
758
759 // Then: Should round-trip correctly
760 assert_eq!(variant, deserialized, "Round-trip failed for {variant:?}");
761 }
762 Ok(())
763 }
764
765 #[test]
766 fn test_config_save_and_load_roundtrip() -> Result<()> {
767 // Given: A temporary directory and test configuration
768 let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
769 let config_path = temp_dir.path().join("test_config.toml");
770 let original_config = create_test_config();
771
772 // When: Saving and then loading the configuration
773 let content = toml::to_string_pretty(&original_config)
774 .map_err(|e| Error::Config(format!("Failed to serialize: {e}")))?;
775 fs::write(&config_path, content)
776 .map_err(|e| Error::Config(format!("Failed to write: {e}")))?;
777
778 let loaded_config: Config = {
779 let content = fs::read_to_string(&config_path)
780 .map_err(|e| Error::Config(format!("Failed to read: {e}")))?;
781 toml::from_str(&content).map_err(|e| Error::Config(format!("Failed to parse: {e}")))?
782 };
783
784 // Then: Configurations should be identical
785 assert_eq!(
786 loaded_config.defaults.refresh_hours,
787 original_config.defaults.refresh_hours
788 );
789 assert_eq!(
790 loaded_config.defaults.max_archives,
791 original_config.defaults.max_archives
792 );
793 assert_eq!(
794 loaded_config.defaults.fetch_enabled,
795 original_config.defaults.fetch_enabled
796 );
797 assert_eq!(
798 loaded_config.defaults.allowlist,
799 original_config.defaults.allowlist
800 );
801 assert_eq!(loaded_config.paths.root, original_config.paths.root);
802
803 Ok(())
804 }
805
806 #[test]
807 fn test_config_load_missing_file() {
808 // Given: A non-existent config file path
809 let non_existent = PathBuf::from("/definitely/does/not/exist/config.toml");
810
811 // When: Attempting to load config
812 let result = (|| -> Result<Config> {
813 let content = fs::read_to_string(&non_existent)
814 .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
815 toml::from_str(&content)
816 .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
817 })();
818
819 // Then: Should return appropriate error
820 assert!(result.is_err());
821 match result {
822 Err(Error::Config(msg)) => assert!(msg.contains("Failed to read config")),
823 _ => unreachable!("Expected Config error"),
824 }
825 }
826
827 #[test]
828 fn test_config_parse_invalid_toml() {
829 // Given: Invalid TOML content
830 let temp_dir = TempDir::new().expect("Failed to create temp dir");
831 let config_path = temp_dir.path().join("invalid.toml");
832 fs::write(&config_path, "this is not valid toml [[[").expect("Failed to write test file");
833
834 // When: Attempting to parse
835 let result = (|| -> Result<Config> {
836 let content = fs::read_to_string(&config_path)
837 .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
838 toml::from_str(&content)
839 .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
840 })();
841
842 // Then: Should return parse error
843 assert!(result.is_err());
844 if let Err(Error::Config(msg)) = result {
845 assert!(msg.contains("Failed to parse config"));
846 } else {
847 panic!("Expected Config parse error");
848 }
849 }
850
851 #[test]
852 fn test_config_save_creates_directory() -> Result<()> {
853 // Given: A temporary directory and nested config path
854 let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
855 let nested_path = temp_dir
856 .path()
857 .join("nested")
858 .join("deeper")
859 .join("config.toml");
860 let config = create_test_config();
861
862 // When: Saving config to nested path (simulating Config::save logic)
863 let parent = nested_path
864 .parent()
865 .ok_or_else(|| Error::Config("Invalid config path".into()))?;
866 fs::create_dir_all(parent)
867 .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
868
869 let content = toml::to_string_pretty(&config)
870 .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
871 fs::write(&nested_path, content)
872 .map_err(|e| Error::Config(format!("Failed to write config: {e}")))?;
873
874 // Then: Directory should be created and file should exist
875 assert!(nested_path.exists());
876 assert!(
877 nested_path
878 .parent()
879 .expect("path should have parent")
880 .exists()
881 );
882
883 Ok(())
884 }
885
886 #[test]
887 fn test_tool_config_roundtrip() -> Result<()> {
888 // Given: A temporary file and test tool configuration
889 let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
890 let config_path = temp_dir.path().join("tool.toml");
891 let original_config = create_test_tool_config();
892
893 // When: Saving and loading the tool configuration
894 original_config.save(&config_path)?;
895 let loaded_config = ToolConfig::load(&config_path)?;
896
897 // Then: Configurations should be identical
898 assert_eq!(loaded_config.meta.name, original_config.meta.name);
899 assert_eq!(
900 loaded_config.meta.display_name,
901 original_config.meta.display_name
902 );
903 assert_eq!(loaded_config.meta.homepage, original_config.meta.homepage);
904 assert_eq!(loaded_config.meta.repo, original_config.meta.repo);
905 assert_eq!(
906 loaded_config.fetch.refresh_hours,
907 original_config.fetch.refresh_hours
908 );
909 assert_eq!(
910 loaded_config.fetch.allowlist,
911 original_config.fetch.allowlist
912 );
913 assert_eq!(
914 loaded_config.index.max_heading_block_lines,
915 original_config.index.max_heading_block_lines
916 );
917
918 Ok(())
919 }
920
921 #[test]
922 fn test_tool_config_load_nonexistent_file() {
923 // Given: A non-existent file path
924 let non_existent = PathBuf::from("/does/not/exist/tool.toml");
925
926 // When: Attempting to load
927 let result = ToolConfig::load(&non_existent);
928
929 // Then: Should return appropriate error
930 assert!(result.is_err());
931 if let Err(Error::Config(msg)) = result {
932 assert!(msg.contains("Failed to read tool config"));
933 } else {
934 panic!("Expected Config error");
935 }
936 }
937
938 #[test]
939 fn test_config_with_extreme_values() -> Result<()> {
940 // Given: Configuration with extreme but valid values (avoiding serialization limits)
941 let extreme_config = Config {
942 defaults: DefaultsConfig {
943 refresh_hours: 1_000_000, // Large but not MAX to avoid TOML issues
944 max_archives: 1_000_000, // Large but not MAX to avoid TOML issues
945 fetch_enabled: false,
946 follow_links: FollowLinks::None,
947 allowlist: vec!["a".repeat(1000)], // Very long domain
948 prefer_llms_full: false,
949 },
950 paths: PathsConfig {
951 root: PathBuf::from("/".repeat(100)), // Very long path
952 },
953 };
954
955 // When: Serializing and deserializing
956 let serialized = toml::to_string_pretty(&extreme_config)
957 .map_err(|e| Error::Config(format!("Serialize failed: {e}")))?;
958 let deserialized: Config = toml::from_str(&serialized)
959 .map_err(|e| Error::Config(format!("Deserialize failed: {e}")))?;
960
961 // Then: Should handle extreme values correctly
962 assert_eq!(deserialized.defaults.refresh_hours, 1_000_000);
963 assert_eq!(deserialized.defaults.max_archives, 1_000_000);
964 assert!(!deserialized.defaults.fetch_enabled);
965 assert_eq!(deserialized.defaults.allowlist.len(), 1);
966 assert_eq!(deserialized.defaults.allowlist[0].len(), 1000);
967
968 Ok(())
969 }
970
971 #[test]
972 fn test_config_empty_allowlist() -> Result<()> {
973 // Given: Configuration with empty allowlist
974 let config = Config {
975 defaults: DefaultsConfig {
976 refresh_hours: 24,
977 max_archives: 10,
978 fetch_enabled: true,
979 follow_links: FollowLinks::Allowlist,
980 allowlist: vec![], // Empty allowlist
981 prefer_llms_full: false,
982 },
983 paths: PathsConfig {
984 root: PathBuf::from("/tmp"),
985 },
986 };
987
988 // When: Serializing and deserializing
989 let serialized = toml::to_string_pretty(&config)?;
990 let deserialized: Config = toml::from_str(&serialized)?;
991
992 // Then: Empty allowlist should be preserved
993 assert!(deserialized.defaults.allowlist.is_empty());
994 assert!(matches!(
995 deserialized.defaults.follow_links,
996 FollowLinks::Allowlist
997 ));
998
999 Ok(())
1000 }
1001
1002 // Property-based tests
1003 proptest! {
1004 #[test]
1005 fn test_config_refresh_hours_roundtrip(refresh_hours in 1u32..=365*24) {
1006 let config = Config {
1007 defaults: DefaultsConfig {
1008 refresh_hours,
1009 max_archives: 10,
1010 fetch_enabled: true,
1011 follow_links: FollowLinks::FirstParty,
1012 allowlist: vec![],
1013 prefer_llms_full: false,
1014 },
1015 paths: PathsConfig {
1016 root: PathBuf::from("/tmp"),
1017 },
1018 };
1019
1020 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1021 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1022
1023 prop_assert_eq!(deserialized.defaults.refresh_hours, refresh_hours);
1024 }
1025
1026 #[test]
1027 fn test_config_max_archives_roundtrip(max_archives in 1usize..=1000) {
1028 let config = Config {
1029 defaults: DefaultsConfig {
1030 refresh_hours: 24,
1031 max_archives,
1032 fetch_enabled: true,
1033 follow_links: FollowLinks::FirstParty,
1034 allowlist: vec![],
1035 prefer_llms_full: false,
1036 },
1037 paths: PathsConfig {
1038 root: PathBuf::from("/tmp"),
1039 },
1040 };
1041
1042 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1043 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1044
1045 prop_assert_eq!(deserialized.defaults.max_archives, max_archives);
1046 }
1047
1048 #[test]
1049 fn test_config_allowlist_roundtrip(allowlist in prop::collection::vec(r"[a-z0-9\.-]+", 0..=10)) {
1050 let config = Config {
1051 defaults: DefaultsConfig {
1052 refresh_hours: 24,
1053 max_archives: 10,
1054 fetch_enabled: true,
1055 follow_links: FollowLinks::Allowlist,
1056 allowlist: allowlist.clone(),
1057 prefer_llms_full: false,
1058 },
1059 paths: PathsConfig {
1060 root: PathBuf::from("/tmp"),
1061 },
1062 };
1063
1064 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1065 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1066
1067 prop_assert_eq!(deserialized.defaults.allowlist, allowlist);
1068 }
1069 }
1070
1071 /*
1072 // Security-focused tests
1073 #[test]
1074 fn test_config_path_traversal_prevention() {
1075 // Given: Config with potentially malicious paths
1076 let malicious_paths = vec![
1077 "../../../etc/passwd",
1078 "..\\..\\..\\windows\\system32",
1079 "/etc/shadow",
1080 "../../.ssh/id_rsa",
1081 ];
1082
1083 for malicious_path in malicious_paths {
1084 // When: Creating config with malicious path
1085 let config = Config {
1086 defaults: DefaultsConfig {
1087 refresh_hours: 24,
1088 max_archives: 10,
1089 fetch_enabled: true,
1090 follow_links: FollowLinks::FirstParty,
1091 allowlist: vec![],
1092 },
1093 paths: PathsConfig {
1094 root: PathBuf::from(malicious_path),
1095 },
1096 };
1097
1098 // Then: Should still serialize/deserialize (path validation is separate)
1099 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1100 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1101 assert_eq!(deserialized.paths.root, PathBuf::from(malicious_path));
1102 }
1103 }
1104
1105 #[test]
1106 fn test_config_malicious_toml_injection() {
1107 // Given: Potentially malicious TOML strings that could break parsing
1108 let malicious_strings = vec![
1109 "\n[malicious]\nkey = \"value\"",
1110 "\"quotes\"in\"weird\"places",
1111 "key = \"value\"\n[new_section]",
1112 "unicode = \"\\u0000\\u0001\\u0002\"",
1113 ];
1114
1115 for malicious_string in malicious_strings {
1116 // When: Setting allowlist with potentially malicious content
1117 let config = Config {
1118 defaults: DefaultsConfig {
1119 refresh_hours: 24,
1120 max_archives: 10,
1121 fetch_enabled: true,
1122 follow_links: FollowLinks::Allowlist,
1123 allowlist: vec![malicious_string.to_string()],
1124 },
1125 paths: PathsConfig {
1126 root: PathBuf::from("/tmp"),
1127 },
1128 };
1129
1130 // Then: Should serialize safely (TOML library handles escaping)
1131 let result = toml::to_string_pretty(&config);
1132 assert!(
1133 result.is_ok(),
1134 "Failed to serialize config with: {malicious_string}"
1135 );
1136
1137 if let Ok(serialized) = result {
1138 let deserialized_result: std::result::Result<Config, _> =
1139 toml::from_str(&serialized);
1140 assert!(
1141 deserialized_result.is_ok(),
1142 "Failed to deserialize config with: {malicious_string}"
1143 );
1144 }
1145 }
1146 }
1147
1148 #[test]
1149 fn test_config_unicode_handling() -> Result<()> {
1150 // Given: Configuration with Unicode content
1151 let unicode_config = Config {
1152 defaults: DefaultsConfig {
1153 refresh_hours: 24,
1154 max_archives: 10,
1155 fetch_enabled: true,
1156 follow_links: FollowLinks::Allowlist,
1157 allowlist: vec![
1158 "例え.com".to_string(), // Japanese
1159 "مثال.com".to_string(), // Arabic
1160 "пример.com".to_string(), // Cyrillic
1161 "🚀.test.com".to_string(), // Emoji
1162 ],
1163 },
1164 paths: PathsConfig {
1165 root: PathBuf::from("/tmp/测试"), // Chinese characters
1166 },
1167 };
1168
1169 // When: Serializing and deserializing
1170 let serialized = toml::to_string_pretty(&unicode_config)?;
1171 let deserialized: Config = toml::from_str(&serialized)?;
1172
1173 // Then: Unicode should be preserved correctly
1174 assert_eq!(deserialized.defaults.allowlist.len(), 4);
1175 assert!(
1176 deserialized
1177 .defaults
1178 .allowlist
1179 .contains(&"例え.com".to_string())
1180 );
1181 assert!(
1182 deserialized
1183 .defaults
1184 .allowlist
1185 .contains(&"🚀.test.com".to_string())
1186 );
1187 assert_eq!(deserialized.paths.root, PathBuf::from("/tmp/测试"));
1188
1189 Ok(())
1190 }
1191
1192 #[test]
1193 fn test_config_edge_case_empty_values() -> Result<()> {
1194 // Given: Configuration with empty values
1195 let empty_config = Config {
1196 defaults: DefaultsConfig {
1197 refresh_hours: 0, // Edge case: zero refresh
1198 max_archives: 0, // Edge case: no archives
1199 fetch_enabled: false,
1200 follow_links: FollowLinks::None,
1201 allowlist: vec![String::new()], // Empty string in allowlist
1202 },
1203 paths: PathsConfig {
1204 root: PathBuf::from(""), // Empty path
1205 },
1206 };
1207
1208 // When: Serializing and deserializing
1209 let serialized = toml::to_string_pretty(&empty_config)?;
1210 let deserialized: Config = toml::from_str(&serialized)?;
1211
1212 // Then: Empty/zero values should be handled correctly
1213 assert_eq!(deserialized.defaults.refresh_hours, 0);
1214 assert_eq!(deserialized.defaults.max_archives, 0);
1215 assert_eq!(deserialized.defaults.allowlist.len(), 1);
1216 assert_eq!(deserialized.defaults.allowlist[0], "");
1217 assert_eq!(deserialized.paths.root, PathBuf::from(""));
1218
1219 Ok(())
1220 }
1221 }
1222 */
1223}