blz_core/
config.rs

1//! Configuration management for blz cache system.
2//!
3//! This module provides hierarchical configuration with global defaults and per-source overrides.
4//! Configuration is stored in TOML format and supports environment variable overrides.
5//!
6//! ## Configuration Hierarchy
7//!
8//! 1. **Global config**: Platform-specific config directory (see `GlobalConfig` docs)
9//! 2. **Per-source config**: `<source_dir>/settings.toml`
10//! 3. **Environment variables**: `CACHE_*` prefix
11//!
12//! ## Examples
13//!
14//! ### Loading global configuration:
15//!
16//! ```rust
17//! use blz_core::{Config, Result};
18//!
19//! // Load from default location or create with defaults
20//! let config = Config::load()?;
21//! println!("Cache root: {}", config.paths.root.display());
22//! println!("Refresh interval: {} hours", config.defaults.refresh_hours);
23//! # Ok::<(), blz_core::Error>(())
24//! ```
25//!
26//! ### Working with tool-specific configuration:
27//!
28//! ```rust,no_run
29//! use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
30//! use std::path::Path;
31//!
32//! let tool_config = ToolConfig {
33//!     meta: ToolMeta {
34//!         name: "react".to_string(),
35//!         display_name: Some("React Documentation".to_string()),
36//!         homepage: Some("https://react.dev".to_string()),
37//!         repo: Some("https://github.com/facebook/react".to_string()),
38//!     },
39//!     fetch: FetchConfig {
40//!         refresh_hours: Some(12), // Override global default
41//!         follow_links: None,      // Use global default
42//!         allowlist: None,         // Use global default
43//!     },
44//!     index: IndexConfig {
45//!         max_heading_block_lines: Some(500),
46//!     },
47//! };
48//!
49//! // Save to file
50//! tool_config.save(Path::new("react/settings.toml"))?;
51//! # Ok::<(), blz_core::Error>(())
52//! ```
53
54use crate::{Error, Result};
55use serde::{Deserialize, Serialize};
56use std::fs;
57use std::path::{Path, PathBuf};
58
59/// Global configuration for the blz cache system.
60///
61/// Contains default settings that apply to all sources unless overridden by per-source configuration.
62/// Configuration is automatically loaded from the system config directory or created with sensible defaults.
63///
64/// ## File Location
65///
66/// The configuration file is stored at (searched in order):
67/// - XDG: `$XDG_CONFIG_HOME/blz/config.toml` or `~/.config/blz/config.toml`
68/// - Dotfile fallback: `~/.blz/config.toml`
69///
70/// A `config.local.toml` in the same directory overrides keys from `config.toml`.
71///
72/// ## Example Configuration File
73///
74/// ```toml
75/// [defaults]
76/// refresh_hours = 24
77/// max_archives = 10
78/// fetch_enabled = true
79/// follow_links = "first_party"
80/// allowlist = ["docs.rs", "developer.mozilla.org"]
81///
82/// [paths]
83/// root = "/home/user/.outfitter/blz"
84/// ```
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct Config {
87    /// Default settings for all sources
88    pub defaults: DefaultsConfig,
89    /// File system paths configuration
90    pub paths: PathsConfig,
91}
92
93/// Default settings that apply to all sources unless overridden.
94///
95/// These settings control fetching behavior, caching policies, and link following rules.
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct DefaultsConfig {
98    /// How often to refresh cached content (in hours).
99    ///
100    /// Sources are only re-fetched if they haven't been updated within this interval.
101    /// Set to 0 to always fetch on access.
102    pub refresh_hours: u32,
103
104    /// Maximum number of archived versions to keep per source.
105    ///
106    /// When a source is updated, the previous version is archived. This setting
107    /// controls how many historical versions to retain for diff generation.
108    pub max_archives: usize,
109
110    /// Whether fetching from remote sources is enabled.
111    ///
112    /// When disabled, only locally cached content is used. Useful for offline work
113    /// or environments with restricted network access.
114    pub fetch_enabled: bool,
115
116    /// Policy for following links in llms.txt files.
117    ///
118    /// Controls whether and which external links should be followed when processing
119    /// llms.txt files that contain references to other documentation sources.
120    pub follow_links: FollowLinks,
121
122    /// Domains allowed for link following.
123    ///
124    /// Only used when `follow_links` is set to `Allowlist`. Links to domains
125    /// not in this list will be ignored.
126    pub allowlist: Vec<String>,
127
128    /// Prefer upgrading/using llms-full.txt when available.
129    /// When true, update operations default to choosing llms-full.txt where available.
130    #[serde(default)]
131    pub prefer_llms_full: bool,
132}
133
134/// Policy for following external links in llms.txt files.
135///
136/// This controls how the system handles links to other documentation sources
137/// within llms.txt files.
138#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
139#[serde(rename_all = "snake_case")]
140pub enum FollowLinks {
141    /// Never follow external links.
142    ///
143    /// Only process the original llms.txt file, ignoring any links to other sources.
144    None,
145
146    /// Follow links to the same domain and its immediate subdomains.
147    ///
148    /// For example, if processing `docs.example.com/llms.txt`, links to
149    /// `api.example.com/docs` or `example.com/guide` would be followed,
150    /// but `other-site.com/docs` would be ignored.
151    FirstParty,
152
153    /// Only follow links to domains in the allowlist.
154    ///
155    /// Use the `allowlist` field in `DefaultsConfig` to specify which domains
156    /// are permitted. This provides fine-grained control over which external
157    /// sources are trusted.
158    Allowlist,
159}
160
161/// File system paths configuration.
162///
163/// Defines where cached content, indices, and metadata are stored on the local filesystem.
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct PathsConfig {
166    /// Root directory for all cached content.
167    ///
168    /// Each source gets its own subdirectory under this root. The directory
169    /// structure is: `root/<source_alias>/`
170    ///
171    /// Default locations:
172    /// - Linux: `~/.local/share/blz`
173    /// - macOS: `~/Library/Application Support/dev.outfitter.blz`
174    /// - Windows: `%APPDATA%\outfitter\blz`
175    pub root: PathBuf,
176}
177
178impl Config {
179    /// Load configuration from the default location or create with defaults.
180    ///
181    /// This method attempts to load the configuration from the system config directory.
182    /// If the file doesn't exist, it returns a configuration with sensible defaults.
183    /// If the file exists but is malformed, it returns an error.
184    ///
185    /// # Returns
186    ///
187    /// Returns the loaded configuration or a default configuration if no file exists.
188    ///
189    /// # Errors
190    ///
191    /// Returns an error if:
192    /// - The config directory cannot be determined (unsupported platform)
193    /// - The config file exists but cannot be read (permissions, I/O error)
194    /// - The config file exists but contains invalid TOML syntax
195    /// - The config file exists but contains invalid configuration values
196    ///
197    /// # Examples
198    ///
199    /// ```rust
200    /// use blz_core::Config;
201    ///
202    /// // Load existing config or create with defaults
203    /// let config = Config::load()?;
204    ///
205    /// if config.defaults.fetch_enabled {
206    ///     println!("Fetching is enabled");
207    /// }
208    /// # Ok::<(), blz_core::Error>(())
209    /// ```
210    pub fn load() -> Result<Self> {
211        // Determine base config path (BLZ_CONFIG/BLZ_CONFIG_DIR, XDG, dotfile), or use defaults
212        let base_path = Self::existing_config_path()?;
213
214        // Load base
215        let mut base_value: toml::Value = if let Some(ref path) = base_path {
216            let content = fs::read_to_string(path)
217                .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
218            toml::from_str(&content)
219                .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))?
220        } else {
221            let default_str = toml::to_string(&Self::default())
222                .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?;
223            toml::from_str(&default_str)
224                .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?
225        };
226
227        // Merge optional local override next to resolved base directory
228        let base_dir = base_path.as_deref().map_or_else(
229            || {
230                Self::canonical_config_path().map_or_else(
231                    |_| PathBuf::new(),
232                    |p| p.parent().map(Path::to_path_buf).unwrap_or_default(),
233                )
234            },
235            |bp| bp.parent().map(Path::to_path_buf).unwrap_or_default(),
236        );
237
238        let local_path = base_dir.join("config.local.toml");
239        if local_path.exists() {
240            let content = fs::read_to_string(&local_path)
241                .map_err(|e| Error::Config(format!("Failed to read local config: {e}")))?;
242            let local_value: toml::Value = toml::from_str(&content)
243                .map_err(|e| Error::Config(format!("Failed to parse local config: {e}")))?;
244            Self::merge_toml(&mut base_value, &local_value);
245        }
246
247        // Deserialize
248        let mut config: Self = base_value
249            .try_into()
250            .map_err(|e| Error::Config(format!("Failed to materialize config: {e}")))?;
251
252        // Apply env overrides
253        config.apply_env_overrides();
254
255        Ok(config)
256    }
257
258    /// Save the configuration to the default location.
259    ///
260    /// This method serializes the configuration to TOML format and writes it to
261    /// the system config directory. Parent directories are created if they don't exist.
262    ///
263    /// # Errors
264    ///
265    /// Returns an error if:
266    /// - The config directory cannot be determined (unsupported platform)
267    /// - Parent directories cannot be created (permissions, disk space)
268    /// - The configuration cannot be serialized to TOML
269    /// - The file cannot be written (permissions, disk space, I/O error)
270    ///
271    /// # Examples
272    ///
273    /// ```rust,no_run
274    /// use blz_core::{Config, DefaultsConfig, PathsConfig, FollowLinks};
275    /// use std::path::PathBuf;
276    ///
277    /// let mut config = Config::load()?;
278    /// config.defaults.refresh_hours = 12; // Update refresh interval
279    /// config.save()?; // Persist changes
280    /// # Ok::<(), blz_core::Error>(())
281    /// ```
282    pub fn save(&self) -> Result<()> {
283        let config_path = Self::save_target_path()?;
284        let parent = config_path
285            .parent()
286            .ok_or_else(|| Error::Config("Invalid config path".into()))?;
287
288        fs::create_dir_all(parent)
289            .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
290
291        let content = toml::to_string_pretty(self)
292            .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
293
294        let tmp = parent.join("config.toml.tmp");
295        fs::write(&tmp, &content)
296            .map_err(|e| Error::Config(format!("Failed to write temp config: {e}")))?;
297        // Best-effort atomic replace; on Windows, rename() replaces if target does not exist.
298        // SAFETY: config.toml write is replaced in one step to avoid torn files.
299        #[cfg(target_os = "windows")]
300        if config_path.exists() {
301            fs::remove_file(&config_path)
302                .map_err(|e| Error::Config(format!("Failed to remove existing config: {e}")))?;
303        }
304        std::fs::rename(&tmp, &config_path)
305            .map_err(|e| Error::Config(format!("Failed to replace config: {e}")))?;
306
307        Ok(())
308    }
309
310    /// Get the path where the global configuration file is stored.
311    ///
312    /// Uses the system-appropriate config directory based on the platform:
313    /// - Linux: `~/.config/blz/global.toml`
314    /// - macOS: `~/Library/Application Support/dev.outfitter.blz/global.toml`
315    /// - Windows: `%APPDATA%\outfitter\blz\global.toml`
316    ///
317    /// # Errors
318    ///
319    /// Returns an error if the system config directory cannot be determined,
320    /// which may happen on unsupported platforms or in sandboxed environments.
321    fn canonical_config_path() -> Result<PathBuf> {
322        let xdg = std::env::var("XDG_CONFIG_HOME")
323            .ok()
324            .map(PathBuf::from)
325            .or_else(|| directories::BaseDirs::new().map(|b| b.home_dir().join(".config")))
326            .ok_or_else(|| Error::Config("Failed to determine XDG config directory".into()))?;
327        Ok(xdg.join("blz").join("config.toml"))
328    }
329
330    fn dotfile_config_path() -> Result<PathBuf> {
331        let home = directories::BaseDirs::new()
332            .map(|b| b.home_dir().to_path_buf())
333            .ok_or_else(|| Error::Config("Failed to determine home directory".into()))?;
334        Ok(home.join(".blz").join("config.toml"))
335    }
336
337    fn existing_config_path() -> Result<Option<PathBuf>> {
338        // 1) BLZ_CONFIG (file)
339        if let Ok(explicit) = std::env::var("BLZ_CONFIG") {
340            let explicit = explicit.trim();
341            if !explicit.is_empty() {
342                let p = PathBuf::from(explicit);
343                if p.is_file() && p.exists() {
344                    return Ok(Some(p));
345                }
346            }
347        }
348
349        // 2) BLZ_CONFIG_DIR (dir)
350        if let Ok(dir) = std::env::var("BLZ_CONFIG_DIR") {
351            let dir = dir.trim();
352            if !dir.is_empty() {
353                let p = PathBuf::from(dir).join("config.toml");
354                if p.is_file() && p.exists() {
355                    return Ok(Some(p));
356                }
357            }
358        }
359
360        // 3) XDG
361        let xdg = Self::canonical_config_path()?;
362        if xdg.exists() {
363            return Ok(Some(xdg));
364        }
365        // 4) Dotfile
366        let dot = Self::dotfile_config_path()?;
367        if dot.exists() {
368            return Ok(Some(dot));
369        }
370        Ok(None)
371    }
372
373    fn save_target_path() -> Result<PathBuf> {
374        if let Some(existing) = Self::existing_config_path()? {
375            return Ok(existing);
376        }
377        Self::canonical_config_path()
378    }
379
380    fn merge_toml(dst: &mut toml::Value, src: &toml::Value) {
381        use toml::Value::Table;
382        match (dst, src) {
383            (Table(dst_tbl), Table(src_tbl)) => {
384                for (k, v) in src_tbl {
385                    match dst_tbl.get_mut(k) {
386                        Some(dst_v) => Self::merge_toml(dst_v, v),
387                        None => {
388                            dst_tbl.insert(k.clone(), v.clone());
389                        },
390                    }
391                }
392            },
393            (dst_v, src_v) => *dst_v = src_v.clone(),
394        }
395    }
396
397    fn apply_env_overrides(&mut self) {
398        if let Ok(v) = std::env::var("BLZ_REFRESH_HOURS") {
399            if let Ok(n) = v.parse::<u32>() {
400                self.defaults.refresh_hours = n;
401            }
402        }
403        if let Ok(v) = std::env::var("BLZ_MAX_ARCHIVES") {
404            if let Ok(n) = v.parse::<usize>() {
405                self.defaults.max_archives = n;
406            }
407        }
408        if let Ok(v) = std::env::var("BLZ_FETCH_ENABLED") {
409            let norm = v.to_ascii_lowercase();
410            self.defaults.fetch_enabled = matches!(norm.as_str(), "1" | "true" | "yes" | "on");
411        }
412        if let Ok(v) = std::env::var("BLZ_FOLLOW_LINKS") {
413            match v.to_ascii_lowercase().as_str() {
414                "none" => self.defaults.follow_links = FollowLinks::None,
415                "first_party" | "firstparty" => {
416                    self.defaults.follow_links = FollowLinks::FirstParty;
417                },
418                "allowlist" => self.defaults.follow_links = FollowLinks::Allowlist,
419                _ => {},
420            }
421        }
422        if let Ok(v) = std::env::var("BLZ_ALLOWLIST") {
423            let list = v
424                .split(',')
425                .map(|s| s.trim().to_string())
426                .filter(|s| !s.is_empty())
427                .collect::<Vec<_>>();
428            if !list.is_empty() {
429                self.defaults.allowlist = list;
430            }
431        }
432        if let Ok(v) = std::env::var("BLZ_ROOT") {
433            let p = PathBuf::from(v);
434            if !p.as_os_str().is_empty() {
435                self.paths.root = p;
436            }
437        }
438        if let Ok(v) = std::env::var("BLZ_PREFER_LLMS_FULL") {
439            let s = v.trim().to_ascii_lowercase();
440            self.defaults.prefer_llms_full = matches!(s.as_str(), "1" | "true" | "yes" | "on");
441        }
442    }
443}
444
445impl Default for Config {
446    fn default() -> Self {
447        Self {
448            defaults: DefaultsConfig {
449                refresh_hours: 24,
450                max_archives: 10,
451                fetch_enabled: true,
452                follow_links: FollowLinks::FirstParty,
453                allowlist: Vec::new(),
454                prefer_llms_full: false,
455            },
456            paths: PathsConfig {
457                root: directories::ProjectDirs::from("dev", "outfitter", "blz").map_or_else(
458                    || {
459                        // Expand home directory properly
460                        directories::BaseDirs::new().map_or_else(
461                            || PathBuf::from(".outfitter/blz"),
462                            |base| base.home_dir().join(".outfitter").join("blz"),
463                        )
464                    },
465                    |dirs| dirs.data_dir().to_path_buf(),
466                ),
467            },
468        }
469    }
470}
471
472/// Per-source configuration that overrides global defaults.
473///
474/// Each documentation source can have its own configuration file (`settings.toml`)
475/// that overrides the global configuration for that specific source. This allows
476/// fine-grained control over fetching behavior, indexing parameters, and metadata.
477///
478/// ## File Location
479///
480/// Stored as `<cache_root>/<source_alias>/settings.toml`
481///
482/// ## Example Configuration File
483///
484/// ```toml
485/// [meta]
486/// name = "react"
487/// display_name = "React Documentation"
488/// homepage = "https://react.dev"
489/// repo = "https://github.com/facebook/react"
490///
491/// [fetch]
492/// refresh_hours = 12  # Override global default
493/// follow_links = "first_party"
494/// allowlist = ["reactjs.org", "react.dev"]
495///
496/// [index]
497/// max_heading_block_lines = 500
498/// ```
499#[derive(Debug, Clone, Serialize, Deserialize)]
500pub struct ToolConfig {
501    /// Metadata about the documentation source
502    pub meta: ToolMeta,
503    /// Fetching behavior overrides
504    pub fetch: FetchConfig,
505    /// Indexing parameter overrides
506    pub index: IndexConfig,
507}
508
509/// Metadata about a documentation source.
510///
511/// This information is used for display purposes and to provide context
512/// about the source of documentation being cached.
513#[derive(Debug, Clone, Serialize, Deserialize)]
514pub struct ToolMeta {
515    /// Unique identifier for this source (used as directory name).
516    ///
517    /// Should be a valid filename that uniquely identifies the source.
518    /// Typically lowercase with hyphens (e.g., "react", "node-js", "rust-std").
519    pub name: String,
520
521    /// Human-readable display name for the source.
522    ///
523    /// Used in search results and UI displays. If not provided, the `name`
524    /// field is used as fallback.
525    pub display_name: Option<String>,
526
527    /// Homepage URL for the documentation source.
528    ///
529    /// The main website or documentation portal for this source.
530    /// Used for reference and linking back to the original documentation.
531    pub homepage: Option<String>,
532
533    /// Repository URL for the documentation source.
534    ///
535    /// Link to the source code repository, if available. Useful for
536    /// understanding the project context and accessing source code.
537    pub repo: Option<String>,
538}
539
540/// Per-source fetching behavior overrides.
541///
542/// These settings override the global defaults for fetching behavior.
543/// Any `None` values will use the corresponding global default setting.
544#[derive(Debug, Clone, Serialize, Deserialize)]
545pub struct FetchConfig {
546    /// Override for refresh interval in hours.
547    ///
548    /// If `Some`, overrides the global `refresh_hours` setting for this source.
549    /// If `None`, uses the global default.
550    pub refresh_hours: Option<u32>,
551
552    /// Override for link following policy.
553    ///
554    /// If `Some`, overrides the global `follow_links` setting for this source.
555    /// If `None`, uses the global default.
556    pub follow_links: Option<FollowLinks>,
557
558    /// Override for allowed domains list.
559    ///
560    /// If `Some`, overrides the global `allowlist` setting for this source.
561    /// If `None`, uses the global default. Only used when `follow_links` is `Allowlist`.
562    pub allowlist: Option<Vec<String>>,
563}
564
565/// Per-source indexing parameter overrides.
566///
567/// These settings control how the documentation is processed and indexed
568/// for this specific source, overriding global defaults where specified.
569#[derive(Debug, Clone, Serialize, Deserialize)]
570pub struct IndexConfig {
571    /// Maximum lines to include in a single heading block.
572    ///
573    /// Controls how large sections are broken up during indexing. Larger values
574    /// include more context but may reduce search precision. Smaller values
575    /// provide more focused results but may split related content.
576    ///
577    /// If `None`, uses a sensible default based on content analysis.
578    pub max_heading_block_lines: Option<usize>,
579}
580
581impl ToolConfig {
582    /// Load per-source configuration from a file.
583    ///
584    /// Loads and parses a TOML configuration file for a specific documentation source.
585    /// The file should contain sections for `[meta]`, `[fetch]`, and `[index]`.
586    ///
587    /// # Arguments
588    ///
589    /// * `path` - Path to the configuration file (typically `settings.toml`)
590    ///
591    /// # Returns
592    ///
593    /// Returns the parsed configuration.
594    ///
595    /// # Errors
596    ///
597    /// Returns an error if:
598    /// - The file cannot be read (doesn't exist, permissions, I/O error)
599    /// - The file contains invalid TOML syntax
600    /// - The file contains invalid configuration values
601    /// - Required fields are missing (e.g., `meta.name`)
602    ///
603    /// # Examples
604    ///
605    /// ```rust,no_run
606    /// use blz_core::ToolConfig;
607    /// use std::path::Path;
608    ///
609    /// // Load source-specific configuration
610    /// let config_path = Path::new("sources/react/settings.toml");
611    /// let tool_config = ToolConfig::load(config_path)?;
612    ///
613    /// println!("Source: {}", tool_config.meta.name);
614    /// if let Some(refresh) = tool_config.fetch.refresh_hours {
615    ///     println!("Custom refresh interval: {} hours", refresh);
616    /// }
617    /// # Ok::<(), blz_core::Error>(())
618    /// ```
619    pub fn load(path: &Path) -> Result<Self> {
620        let content = fs::read_to_string(path)
621            .map_err(|e| Error::Config(format!("Failed to read tool config: {e}")))?;
622        toml::from_str(&content)
623            .map_err(|e| Error::Config(format!("Failed to parse tool config: {e}")))
624    }
625
626    /// Save per-source configuration to a file.
627    ///
628    /// Serializes the configuration to TOML format and writes it to the specified path.
629    /// The parent directory must already exist.
630    ///
631    /// # Arguments
632    ///
633    /// * `path` - Path where to save the configuration file
634    ///
635    /// # Errors
636    ///
637    /// Returns an error if:
638    /// - The configuration cannot be serialized to TOML
639    /// - The parent directory doesn't exist
640    /// - The file cannot be written (permissions, disk space, I/O error)
641    ///
642    /// # Examples
643    ///
644    /// ```rust,no_run
645    /// use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
646    /// use std::path::Path;
647    ///
648    /// let config = ToolConfig {
649    ///     meta: ToolMeta {
650    ///         name: "my-docs".to_string(),
651    ///         display_name: Some("My Documentation".to_string()),
652    ///         homepage: None,
653    ///         repo: None,
654    ///     },
655    ///     fetch: FetchConfig {
656    ///         refresh_hours: Some(6),
657    ///         follow_links: None,
658    ///         allowlist: None,
659    ///     },
660    ///     index: IndexConfig {
661    ///         max_heading_block_lines: Some(300),
662    ///     },
663    /// };
664    ///
665    /// let config_path = Path::new("my-docs/settings.toml");
666    /// config.save(config_path)?;
667    /// # Ok::<(), blz_core::Error>(())
668    /// ```
669    pub fn save(&self, path: &Path) -> Result<()> {
670        let content = toml::to_string_pretty(self)
671            .map_err(|e| Error::Config(format!("Failed to serialize tool config: {e}")))?;
672        fs::write(path, content)
673            .map_err(|e| Error::Config(format!("Failed to write tool config: {e}")))?;
674        Ok(())
675    }
676}
677
678#[cfg(test)]
679#[allow(
680    clippy::panic,
681    clippy::disallowed_macros,
682    clippy::unwrap_used,
683    clippy::unnecessary_wraps
684)]
685mod tests {
686    use super::*;
687    use proptest::prelude::*;
688    use std::fs;
689    use tempfile::TempDir;
690
691    // Test fixtures
692    fn create_test_config() -> Config {
693        Config {
694            defaults: DefaultsConfig {
695                refresh_hours: 12,
696                max_archives: 5,
697                fetch_enabled: true,
698                follow_links: FollowLinks::Allowlist,
699                allowlist: vec!["example.com".to_string(), "docs.rs".to_string()],
700                prefer_llms_full: false,
701            },
702            paths: PathsConfig {
703                root: PathBuf::from("/tmp/test"),
704            },
705        }
706    }
707
708    fn create_test_tool_config() -> ToolConfig {
709        ToolConfig {
710            meta: ToolMeta {
711                name: "test-tool".to_string(),
712                display_name: Some("Test Tool".to_string()),
713                homepage: Some("https://test.com".to_string()),
714                repo: Some("https://github.com/test/tool".to_string()),
715            },
716            fetch: FetchConfig {
717                refresh_hours: Some(6),
718                follow_links: Some(FollowLinks::FirstParty),
719                allowlist: Some(vec!["allowed.com".to_string()]),
720            },
721            index: IndexConfig {
722                max_heading_block_lines: Some(100),
723            },
724        }
725    }
726
727    #[test]
728    fn test_default_config_values() {
729        // Given: Default configuration is requested
730        let config = Config::default();
731
732        // When: Examining default values
733        // Then: Should have sensible defaults
734        assert_eq!(config.defaults.refresh_hours, 24);
735        assert_eq!(config.defaults.max_archives, 10);
736        assert!(config.defaults.fetch_enabled);
737        assert!(matches!(
738            config.defaults.follow_links,
739            FollowLinks::FirstParty
740        ));
741        assert!(config.defaults.allowlist.is_empty());
742        assert!(!config.paths.root.as_os_str().is_empty());
743    }
744
745    #[test]
746    fn test_follow_links_serialization() -> Result<()> {
747        // Given: Different FollowLinks variants
748        let variants = vec![
749            FollowLinks::None,
750            FollowLinks::FirstParty,
751            FollowLinks::Allowlist,
752        ];
753
754        for variant in variants {
755            // When: Serializing and deserializing
756            let serialized = serde_json::to_string(&variant)?;
757            let deserialized: FollowLinks = serde_json::from_str(&serialized)?;
758
759            // Then: Should round-trip correctly
760            assert_eq!(variant, deserialized, "Round-trip failed for {variant:?}");
761        }
762        Ok(())
763    }
764
765    #[test]
766    fn test_config_save_and_load_roundtrip() -> Result<()> {
767        // Given: A temporary directory and test configuration
768        let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
769        let config_path = temp_dir.path().join("test_config.toml");
770        let original_config = create_test_config();
771
772        // When: Saving and then loading the configuration
773        let content = toml::to_string_pretty(&original_config)
774            .map_err(|e| Error::Config(format!("Failed to serialize: {e}")))?;
775        fs::write(&config_path, content)
776            .map_err(|e| Error::Config(format!("Failed to write: {e}")))?;
777
778        let loaded_config: Config = {
779            let content = fs::read_to_string(&config_path)
780                .map_err(|e| Error::Config(format!("Failed to read: {e}")))?;
781            toml::from_str(&content).map_err(|e| Error::Config(format!("Failed to parse: {e}")))?
782        };
783
784        // Then: Configurations should be identical
785        assert_eq!(
786            loaded_config.defaults.refresh_hours,
787            original_config.defaults.refresh_hours
788        );
789        assert_eq!(
790            loaded_config.defaults.max_archives,
791            original_config.defaults.max_archives
792        );
793        assert_eq!(
794            loaded_config.defaults.fetch_enabled,
795            original_config.defaults.fetch_enabled
796        );
797        assert_eq!(
798            loaded_config.defaults.allowlist,
799            original_config.defaults.allowlist
800        );
801        assert_eq!(loaded_config.paths.root, original_config.paths.root);
802
803        Ok(())
804    }
805
806    #[test]
807    fn test_config_load_missing_file() {
808        // Given: A non-existent config file path
809        let non_existent = PathBuf::from("/definitely/does/not/exist/config.toml");
810
811        // When: Attempting to load config
812        let result = (|| -> Result<Config> {
813            let content = fs::read_to_string(&non_existent)
814                .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
815            toml::from_str(&content)
816                .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
817        })();
818
819        // Then: Should return appropriate error
820        assert!(result.is_err());
821        match result {
822            Err(Error::Config(msg)) => assert!(msg.contains("Failed to read config")),
823            _ => unreachable!("Expected Config error"),
824        }
825    }
826
827    #[test]
828    fn test_config_parse_invalid_toml() {
829        // Given: Invalid TOML content
830        let temp_dir = TempDir::new().expect("Failed to create temp dir");
831        let config_path = temp_dir.path().join("invalid.toml");
832        fs::write(&config_path, "this is not valid toml [[[").expect("Failed to write test file");
833
834        // When: Attempting to parse
835        let result = (|| -> Result<Config> {
836            let content = fs::read_to_string(&config_path)
837                .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
838            toml::from_str(&content)
839                .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
840        })();
841
842        // Then: Should return parse error
843        assert!(result.is_err());
844        if let Err(Error::Config(msg)) = result {
845            assert!(msg.contains("Failed to parse config"));
846        } else {
847            panic!("Expected Config parse error");
848        }
849    }
850
851    #[test]
852    fn test_config_save_creates_directory() -> Result<()> {
853        // Given: A temporary directory and nested config path
854        let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
855        let nested_path = temp_dir
856            .path()
857            .join("nested")
858            .join("deeper")
859            .join("config.toml");
860        let config = create_test_config();
861
862        // When: Saving config to nested path (simulating Config::save logic)
863        let parent = nested_path
864            .parent()
865            .ok_or_else(|| Error::Config("Invalid config path".into()))?;
866        fs::create_dir_all(parent)
867            .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
868
869        let content = toml::to_string_pretty(&config)
870            .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
871        fs::write(&nested_path, content)
872            .map_err(|e| Error::Config(format!("Failed to write config: {e}")))?;
873
874        // Then: Directory should be created and file should exist
875        assert!(nested_path.exists());
876        assert!(
877            nested_path
878                .parent()
879                .expect("path should have parent")
880                .exists()
881        );
882
883        Ok(())
884    }
885
886    #[test]
887    fn test_tool_config_roundtrip() -> Result<()> {
888        // Given: A temporary file and test tool configuration
889        let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
890        let config_path = temp_dir.path().join("tool.toml");
891        let original_config = create_test_tool_config();
892
893        // When: Saving and loading the tool configuration
894        original_config.save(&config_path)?;
895        let loaded_config = ToolConfig::load(&config_path)?;
896
897        // Then: Configurations should be identical
898        assert_eq!(loaded_config.meta.name, original_config.meta.name);
899        assert_eq!(
900            loaded_config.meta.display_name,
901            original_config.meta.display_name
902        );
903        assert_eq!(loaded_config.meta.homepage, original_config.meta.homepage);
904        assert_eq!(loaded_config.meta.repo, original_config.meta.repo);
905        assert_eq!(
906            loaded_config.fetch.refresh_hours,
907            original_config.fetch.refresh_hours
908        );
909        assert_eq!(
910            loaded_config.fetch.allowlist,
911            original_config.fetch.allowlist
912        );
913        assert_eq!(
914            loaded_config.index.max_heading_block_lines,
915            original_config.index.max_heading_block_lines
916        );
917
918        Ok(())
919    }
920
921    #[test]
922    fn test_tool_config_load_nonexistent_file() {
923        // Given: A non-existent file path
924        let non_existent = PathBuf::from("/does/not/exist/tool.toml");
925
926        // When: Attempting to load
927        let result = ToolConfig::load(&non_existent);
928
929        // Then: Should return appropriate error
930        assert!(result.is_err());
931        if let Err(Error::Config(msg)) = result {
932            assert!(msg.contains("Failed to read tool config"));
933        } else {
934            panic!("Expected Config error");
935        }
936    }
937
938    #[test]
939    fn test_config_with_extreme_values() -> Result<()> {
940        // Given: Configuration with extreme but valid values (avoiding serialization limits)
941        let extreme_config = Config {
942            defaults: DefaultsConfig {
943                refresh_hours: 1_000_000, // Large but not MAX to avoid TOML issues
944                max_archives: 1_000_000,  // Large but not MAX to avoid TOML issues
945                fetch_enabled: false,
946                follow_links: FollowLinks::None,
947                allowlist: vec!["a".repeat(1000)], // Very long domain
948                prefer_llms_full: false,
949            },
950            paths: PathsConfig {
951                root: PathBuf::from("/".repeat(100)), // Very long path
952            },
953        };
954
955        // When: Serializing and deserializing
956        let serialized = toml::to_string_pretty(&extreme_config)
957            .map_err(|e| Error::Config(format!("Serialize failed: {e}")))?;
958        let deserialized: Config = toml::from_str(&serialized)
959            .map_err(|e| Error::Config(format!("Deserialize failed: {e}")))?;
960
961        // Then: Should handle extreme values correctly
962        assert_eq!(deserialized.defaults.refresh_hours, 1_000_000);
963        assert_eq!(deserialized.defaults.max_archives, 1_000_000);
964        assert!(!deserialized.defaults.fetch_enabled);
965        assert_eq!(deserialized.defaults.allowlist.len(), 1);
966        assert_eq!(deserialized.defaults.allowlist[0].len(), 1000);
967
968        Ok(())
969    }
970
971    #[test]
972    fn test_config_empty_allowlist() -> Result<()> {
973        // Given: Configuration with empty allowlist
974        let config = Config {
975            defaults: DefaultsConfig {
976                refresh_hours: 24,
977                max_archives: 10,
978                fetch_enabled: true,
979                follow_links: FollowLinks::Allowlist,
980                allowlist: vec![], // Empty allowlist
981                prefer_llms_full: false,
982            },
983            paths: PathsConfig {
984                root: PathBuf::from("/tmp"),
985            },
986        };
987
988        // When: Serializing and deserializing
989        let serialized = toml::to_string_pretty(&config)?;
990        let deserialized: Config = toml::from_str(&serialized)?;
991
992        // Then: Empty allowlist should be preserved
993        assert!(deserialized.defaults.allowlist.is_empty());
994        assert!(matches!(
995            deserialized.defaults.follow_links,
996            FollowLinks::Allowlist
997        ));
998
999        Ok(())
1000    }
1001
1002    // Property-based tests
1003    proptest! {
1004        #[test]
1005        fn test_config_refresh_hours_roundtrip(refresh_hours in 1u32..=365*24) {
1006            let config = Config {
1007                defaults: DefaultsConfig {
1008                    refresh_hours,
1009                    max_archives: 10,
1010                    fetch_enabled: true,
1011                    follow_links: FollowLinks::FirstParty,
1012                    allowlist: vec![],
1013                    prefer_llms_full: false,
1014                },
1015                paths: PathsConfig {
1016                    root: PathBuf::from("/tmp"),
1017                },
1018            };
1019
1020            let serialized = toml::to_string_pretty(&config).expect("should serialize");
1021            let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1022
1023            prop_assert_eq!(deserialized.defaults.refresh_hours, refresh_hours);
1024        }
1025
1026        #[test]
1027        fn test_config_max_archives_roundtrip(max_archives in 1usize..=1000) {
1028            let config = Config {
1029                defaults: DefaultsConfig {
1030                    refresh_hours: 24,
1031                    max_archives,
1032                    fetch_enabled: true,
1033                    follow_links: FollowLinks::FirstParty,
1034                    allowlist: vec![],
1035                    prefer_llms_full: false,
1036                },
1037                paths: PathsConfig {
1038                    root: PathBuf::from("/tmp"),
1039                },
1040            };
1041
1042            let serialized = toml::to_string_pretty(&config).expect("should serialize");
1043            let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1044
1045            prop_assert_eq!(deserialized.defaults.max_archives, max_archives);
1046        }
1047
1048        #[test]
1049        fn test_config_allowlist_roundtrip(allowlist in prop::collection::vec(r"[a-z0-9\.-]+", 0..=10)) {
1050            let config = Config {
1051                defaults: DefaultsConfig {
1052                    refresh_hours: 24,
1053                    max_archives: 10,
1054                    fetch_enabled: true,
1055                    follow_links: FollowLinks::Allowlist,
1056                    allowlist: allowlist.clone(),
1057                    prefer_llms_full: false,
1058                },
1059                paths: PathsConfig {
1060                    root: PathBuf::from("/tmp"),
1061                },
1062            };
1063
1064            let serialized = toml::to_string_pretty(&config).expect("should serialize");
1065            let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1066
1067            prop_assert_eq!(deserialized.defaults.allowlist, allowlist);
1068        }
1069    }
1070
1071    /*
1072    // Security-focused tests
1073    #[test]
1074    fn test_config_path_traversal_prevention() {
1075            // Given: Config with potentially malicious paths
1076            let malicious_paths = vec![
1077                "../../../etc/passwd",
1078                "..\\..\\..\\windows\\system32",
1079                "/etc/shadow",
1080                "../../.ssh/id_rsa",
1081            ];
1082
1083            for malicious_path in malicious_paths {
1084                // When: Creating config with malicious path
1085                let config = Config {
1086                    defaults: DefaultsConfig {
1087                        refresh_hours: 24,
1088                        max_archives: 10,
1089                        fetch_enabled: true,
1090                        follow_links: FollowLinks::FirstParty,
1091                        allowlist: vec![],
1092                    },
1093                    paths: PathsConfig {
1094                        root: PathBuf::from(malicious_path),
1095                    },
1096                };
1097
1098                // Then: Should still serialize/deserialize (path validation is separate)
1099                let serialized = toml::to_string_pretty(&config).expect("should serialize");
1100                let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1101                assert_eq!(deserialized.paths.root, PathBuf::from(malicious_path));
1102            }
1103        }
1104
1105        #[test]
1106        fn test_config_malicious_toml_injection() {
1107            // Given: Potentially malicious TOML strings that could break parsing
1108            let malicious_strings = vec![
1109                "\n[malicious]\nkey = \"value\"",
1110                "\"quotes\"in\"weird\"places",
1111                "key = \"value\"\n[new_section]",
1112                "unicode = \"\\u0000\\u0001\\u0002\"",
1113            ];
1114
1115            for malicious_string in malicious_strings {
1116                // When: Setting allowlist with potentially malicious content
1117                let config = Config {
1118                    defaults: DefaultsConfig {
1119                        refresh_hours: 24,
1120                        max_archives: 10,
1121                        fetch_enabled: true,
1122                        follow_links: FollowLinks::Allowlist,
1123                        allowlist: vec![malicious_string.to_string()],
1124                    },
1125                    paths: PathsConfig {
1126                        root: PathBuf::from("/tmp"),
1127                    },
1128                };
1129
1130                // Then: Should serialize safely (TOML library handles escaping)
1131                let result = toml::to_string_pretty(&config);
1132                assert!(
1133                    result.is_ok(),
1134                    "Failed to serialize config with: {malicious_string}"
1135                );
1136
1137                if let Ok(serialized) = result {
1138                    let deserialized_result: std::result::Result<Config, _> =
1139                        toml::from_str(&serialized);
1140                    assert!(
1141                        deserialized_result.is_ok(),
1142                        "Failed to deserialize config with: {malicious_string}"
1143                    );
1144                }
1145            }
1146        }
1147
1148        #[test]
1149        fn test_config_unicode_handling() -> Result<()> {
1150            // Given: Configuration with Unicode content
1151            let unicode_config = Config {
1152                defaults: DefaultsConfig {
1153                    refresh_hours: 24,
1154                    max_archives: 10,
1155                    fetch_enabled: true,
1156                    follow_links: FollowLinks::Allowlist,
1157                    allowlist: vec![
1158                        "例え.com".to_string(),    // Japanese
1159                        "مثال.com".to_string(),    // Arabic
1160                        "пример.com".to_string(),  // Cyrillic
1161                        "🚀.test.com".to_string(), // Emoji
1162                    ],
1163                },
1164                paths: PathsConfig {
1165                    root: PathBuf::from("/tmp/测试"), // Chinese characters
1166                },
1167            };
1168
1169            // When: Serializing and deserializing
1170            let serialized = toml::to_string_pretty(&unicode_config)?;
1171            let deserialized: Config = toml::from_str(&serialized)?;
1172
1173            // Then: Unicode should be preserved correctly
1174            assert_eq!(deserialized.defaults.allowlist.len(), 4);
1175            assert!(
1176                deserialized
1177                    .defaults
1178                    .allowlist
1179                    .contains(&"例え.com".to_string())
1180            );
1181            assert!(
1182                deserialized
1183                    .defaults
1184                    .allowlist
1185                    .contains(&"🚀.test.com".to_string())
1186            );
1187            assert_eq!(deserialized.paths.root, PathBuf::from("/tmp/测试"));
1188
1189            Ok(())
1190        }
1191
1192        #[test]
1193        fn test_config_edge_case_empty_values() -> Result<()> {
1194            // Given: Configuration with empty values
1195            let empty_config = Config {
1196                defaults: DefaultsConfig {
1197                    refresh_hours: 0, // Edge case: zero refresh
1198                    max_archives: 0,  // Edge case: no archives
1199                    fetch_enabled: false,
1200                    follow_links: FollowLinks::None,
1201                    allowlist: vec![String::new()], // Empty string in allowlist
1202                },
1203                paths: PathsConfig {
1204                    root: PathBuf::from(""), // Empty path
1205                },
1206            };
1207
1208            // When: Serializing and deserializing
1209            let serialized = toml::to_string_pretty(&empty_config)?;
1210            let deserialized: Config = toml::from_str(&serialized)?;
1211
1212            // Then: Empty/zero values should be handled correctly
1213            assert_eq!(deserialized.defaults.refresh_hours, 0);
1214            assert_eq!(deserialized.defaults.max_archives, 0);
1215            assert_eq!(deserialized.defaults.allowlist.len(), 1);
1216            assert_eq!(deserialized.defaults.allowlist[0], "");
1217            assert_eq!(deserialized.paths.root, PathBuf::from(""));
1218
1219            Ok(())
1220        }
1221    }
1222    */
1223}