blz_core/
config.rs

1//! Configuration management for blz cache system.
2//!
3//! This module provides hierarchical configuration with global defaults and per-source overrides.
4//! Configuration is stored in TOML format and supports environment variable overrides.
5//!
6//! ## Configuration Hierarchy
7//!
8//! 1. **Global config**: Platform-specific config directory (see `GlobalConfig` docs)
9//! 2. **Per-source config**: `<source_dir>/settings.toml`
10//! 3. **Environment variables**: `CACHE_*` prefix
11//!
12//! ## Examples
13//!
14//! ### Loading global configuration:
15//!
16//! ```rust
17//! use blz_core::{Config, Result};
18//!
19//! // Load from default location or create with defaults
20//! let config = Config::load()?;
21//! println!("Cache root: {}", config.paths.root.display());
22//! println!("Refresh interval: {} hours", config.defaults.refresh_hours);
23//! # Ok::<(), blz_core::Error>(())
24//! ```
25//!
26//! ### Working with tool-specific configuration:
27//!
28//! ```rust,no_run
29//! use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
30//! use std::path::Path;
31//!
32//! let tool_config = ToolConfig {
33//!     meta: ToolMeta {
34//!         name: "react".to_string(),
35//!         display_name: Some("React Documentation".to_string()),
36//!         homepage: Some("https://react.dev".to_string()),
37//!         repo: Some("https://github.com/facebook/react".to_string()),
38//!     },
39//!     fetch: FetchConfig {
40//!         refresh_hours: Some(12), // Override global default
41//!         follow_links: None,      // Use global default
42//!         allowlist: None,         // Use global default
43//!     },
44//!     index: IndexConfig {
45//!         max_heading_block_lines: Some(500),
46//!     },
47//! };
48//!
49//! // Save to file
50//! tool_config.save(Path::new("react/settings.toml"))?;
51//! # Ok::<(), blz_core::Error>(())
52//! ```
53
54use crate::{Error, Result, profile};
55use serde::{Deserialize, Serialize};
56use std::fs;
57use std::path::{Path, PathBuf};
58
59/// Global configuration for the blz cache system.
60///
61/// Contains default settings that apply to all sources unless overridden by per-source configuration.
62/// Configuration is automatically loaded from the system config directory or created with sensible defaults.
63///
64/// ## File Location
65///
66/// The configuration file is stored at (searched in order):
67/// - XDG: `$XDG_CONFIG_HOME/blz/config.toml` or `~/.config/blz/config.toml`
68/// - Dotfile fallback: `~/.blz/config.toml`
69///
70/// A `config.local.toml` in the same directory overrides keys from `config.toml`.
71///
72/// ## Example Configuration File
73///
74/// ```toml
75/// [defaults]
76/// refresh_hours = 24
77/// max_archives = 10
78/// fetch_enabled = true
79/// follow_links = "first_party"
80/// allowlist = ["docs.rs", "developer.mozilla.org"]
81///
82/// [paths]
83/// root = "/home/user/.outfitter/blz"
84/// ```
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct Config {
87    /// Default settings for all sources
88    pub defaults: DefaultsConfig,
89    /// File system paths configuration
90    pub paths: PathsConfig,
91}
92
93/// Default settings that apply to all sources unless overridden.
94///
95/// These settings control fetching behavior, caching policies, and link following rules.
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct DefaultsConfig {
98    /// How often to refresh cached content (in hours).
99    ///
100    /// Sources are only re-fetched if they haven't been updated within this interval.
101    /// Set to 0 to always fetch on access.
102    pub refresh_hours: u32,
103
104    /// Maximum number of archived versions to keep per source.
105    ///
106    /// When a source is updated, the previous version is archived. This setting
107    /// controls how many historical versions to retain for diff generation.
108    pub max_archives: usize,
109
110    /// Whether fetching from remote sources is enabled.
111    ///
112    /// When disabled, only locally cached content is used. Useful for offline work
113    /// or environments with restricted network access.
114    pub fetch_enabled: bool,
115
116    /// Policy for following links in llms.txt files.
117    ///
118    /// Controls whether and which external links should be followed when processing
119    /// llms.txt files that contain references to other documentation sources.
120    pub follow_links: FollowLinks,
121
122    /// Domains allowed for link following.
123    ///
124    /// Only used when `follow_links` is set to `Allowlist`. Links to domains
125    /// not in this list will be ignored.
126    pub allowlist: Vec<String>,
127}
128
129/// Policy for following external links in llms.txt files.
130///
131/// This controls how the system handles links to other documentation sources
132/// within llms.txt files.
133#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
134#[serde(rename_all = "snake_case")]
135pub enum FollowLinks {
136    /// Never follow external links.
137    ///
138    /// Only process the original llms.txt file, ignoring any links to other sources.
139    None,
140
141    /// Follow links to the same domain and its immediate subdomains.
142    ///
143    /// For example, if processing `docs.example.com/llms.txt`, links to
144    /// `api.example.com/docs` or `example.com/guide` would be followed,
145    /// but `other-site.com/docs` would be ignored.
146    FirstParty,
147
148    /// Only follow links to domains in the allowlist.
149    ///
150    /// Use the `allowlist` field in `DefaultsConfig` to specify which domains
151    /// are permitted. This provides fine-grained control over which external
152    /// sources are trusted.
153    Allowlist,
154}
155
156/// File system paths configuration.
157///
158/// Defines where cached content, indices, and metadata are stored on the local filesystem.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct PathsConfig {
161    /// Root directory for all cached content.
162    ///
163    /// Each source gets its own subdirectory under this root. The directory
164    /// structure is: `root/<source_alias>/`
165    ///
166    /// Default locations:
167    /// - Linux: `~/.local/share/blz`
168    /// - macOS: `~/Library/Application Support/dev.outfitter.blz`
169    /// - Windows: `%APPDATA%\outfitter\blz`
170    pub root: PathBuf,
171}
172
173impl Config {
174    /// Load configuration from the default location or create with defaults.
175    ///
176    /// This method attempts to load the configuration from the system config directory.
177    /// If the file doesn't exist, it returns a configuration with sensible defaults.
178    /// If the file exists but is malformed, it returns an error.
179    ///
180    /// # Returns
181    ///
182    /// Returns the loaded configuration or a default configuration if no file exists.
183    ///
184    /// # Errors
185    ///
186    /// Returns an error if:
187    /// - The config directory cannot be determined (unsupported platform)
188    /// - The config file exists but cannot be read (permissions, I/O error)
189    /// - The config file exists but contains invalid TOML syntax
190    /// - The config file exists but contains invalid configuration values
191    ///
192    /// # Examples
193    ///
194    /// ```rust
195    /// use blz_core::Config;
196    ///
197    /// // Load existing config or create with defaults
198    /// let config = Config::load()?;
199    ///
200    /// if config.defaults.fetch_enabled {
201    ///     println!("Fetching is enabled");
202    /// }
203    /// # Ok::<(), blz_core::Error>(())
204    /// ```
205    pub fn load() -> Result<Self> {
206        // Determine base config path (BLZ_CONFIG/BLZ_CONFIG_DIR, XDG, dotfile), or use defaults
207        let base_path = Self::existing_config_path()?;
208
209        // Load base
210        let mut base_value: toml::Value = if let Some(ref path) = base_path {
211            let content = fs::read_to_string(path)
212                .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
213            toml::from_str(&content)
214                .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))?
215        } else {
216            let default_str = toml::to_string(&Self::default())
217                .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?;
218            toml::from_str(&default_str)
219                .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?
220        };
221
222        // Merge optional local override next to resolved base directory
223        let base_dir = base_path.as_deref().map_or_else(
224            || {
225                Self::canonical_config_path().map_or_else(
226                    |_| PathBuf::new(),
227                    |p| p.parent().map(Path::to_path_buf).unwrap_or_default(),
228                )
229            },
230            |bp| bp.parent().map(Path::to_path_buf).unwrap_or_default(),
231        );
232
233        let local_path = base_dir.join("config.local.toml");
234        if local_path.exists() {
235            let content = fs::read_to_string(&local_path)
236                .map_err(|e| Error::Config(format!("Failed to read local config: {e}")))?;
237            let local_value: toml::Value = toml::from_str(&content)
238                .map_err(|e| Error::Config(format!("Failed to parse local config: {e}")))?;
239            Self::merge_toml(&mut base_value, &local_value);
240        }
241
242        // Deserialize
243        let mut config: Self = base_value
244            .try_into()
245            .map_err(|e| Error::Config(format!("Failed to materialize config: {e}")))?;
246
247        // Apply env overrides
248        config.apply_env_overrides();
249
250        Ok(config)
251    }
252
253    /// Save the configuration to the default location.
254    ///
255    /// This method serializes the configuration to TOML format and writes it to
256    /// the system config directory. Parent directories are created if they don't exist.
257    ///
258    /// # Errors
259    ///
260    /// Returns an error if:
261    /// - The config directory cannot be determined (unsupported platform)
262    /// - Parent directories cannot be created (permissions, disk space)
263    /// - The configuration cannot be serialized to TOML
264    /// - The file cannot be written (permissions, disk space, I/O error)
265    ///
266    /// # Examples
267    ///
268    /// ```rust,no_run
269    /// use blz_core::{Config, DefaultsConfig, PathsConfig, FollowLinks};
270    /// use std::path::PathBuf;
271    ///
272    /// let mut config = Config::load()?;
273    /// config.defaults.refresh_hours = 12; // Update refresh interval
274    /// config.save()?; // Persist changes
275    /// # Ok::<(), blz_core::Error>(())
276    /// ```
277    pub fn save(&self) -> Result<()> {
278        let config_path = Self::save_target_path()?;
279        let parent = config_path
280            .parent()
281            .ok_or_else(|| Error::Config("Invalid config path".into()))?;
282
283        fs::create_dir_all(parent)
284            .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
285
286        let content = toml::to_string_pretty(self)
287            .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
288
289        let tmp = parent.join("config.toml.tmp");
290        fs::write(&tmp, &content)
291            .map_err(|e| Error::Config(format!("Failed to write temp config: {e}")))?;
292        // Best-effort atomic replace; on Windows, rename() replaces if target does not exist.
293        // SAFETY: config.toml write is replaced in one step to avoid torn files.
294        #[cfg(target_os = "windows")]
295        if config_path.exists() {
296            fs::remove_file(&config_path)
297                .map_err(|e| Error::Config(format!("Failed to remove existing config: {e}")))?;
298        }
299        std::fs::rename(&tmp, &config_path)
300            .map_err(|e| Error::Config(format!("Failed to replace config: {e}")))?;
301
302        Ok(())
303    }
304
305    /// Get the path where the global configuration file is stored.
306    ///
307    /// Uses the system-appropriate config directory based on the platform:
308    /// - Linux: `~/.config/blz/global.toml`
309    /// - macOS: `~/Library/Application Support/dev.outfitter.blz/global.toml`
310    /// - Windows: `%APPDATA%\outfitter\blz\global.toml`
311    ///
312    /// # Errors
313    ///
314    /// Returns an error if the system config directory cannot be determined,
315    /// which may happen on unsupported platforms or in sandboxed environments.
316    fn canonical_config_path() -> Result<PathBuf> {
317        let xdg = std::env::var("XDG_CONFIG_HOME")
318            .ok()
319            .map(PathBuf::from)
320            .or_else(|| directories::BaseDirs::new().map(|b| b.home_dir().join(".config")))
321            .ok_or_else(|| Error::Config("Failed to determine XDG config directory".into()))?;
322        Ok(xdg.join(profile::app_dir_slug()).join("config.toml"))
323    }
324
325    fn dotfile_config_path() -> Result<PathBuf> {
326        let home = directories::BaseDirs::new()
327            .map(|b| b.home_dir().to_path_buf())
328            .ok_or_else(|| Error::Config("Failed to determine home directory".into()))?;
329        Ok(home.join(profile::dot_dir_slug()).join("config.toml"))
330    }
331
332    fn existing_config_path() -> Result<Option<PathBuf>> {
333        // 1) BLZ_CONFIG (file)
334        if let Ok(explicit) = std::env::var("BLZ_CONFIG") {
335            let explicit = explicit.trim();
336            if !explicit.is_empty() {
337                let p = PathBuf::from(explicit);
338                if p.is_file() && p.exists() {
339                    return Ok(Some(p));
340                }
341            }
342        }
343
344        // 2) BLZ_CONFIG_DIR (dir)
345        if let Ok(dir) = std::env::var("BLZ_CONFIG_DIR") {
346            let dir = dir.trim();
347            if !dir.is_empty() {
348                let p = PathBuf::from(dir).join("config.toml");
349                if p.is_file() && p.exists() {
350                    return Ok(Some(p));
351                }
352            }
353        }
354
355        // 3) XDG
356        let xdg = Self::canonical_config_path()?;
357        if xdg.exists() {
358            return Ok(Some(xdg));
359        }
360        // 4) Dotfile
361        let dot = Self::dotfile_config_path()?;
362        if dot.exists() {
363            return Ok(Some(dot));
364        }
365        Ok(None)
366    }
367
368    fn save_target_path() -> Result<PathBuf> {
369        if let Some(existing) = Self::existing_config_path()? {
370            return Ok(existing);
371        }
372        Self::canonical_config_path()
373    }
374
375    fn merge_toml(dst: &mut toml::Value, src: &toml::Value) {
376        use toml::Value::Table;
377        match (dst, src) {
378            (Table(dst_tbl), Table(src_tbl)) => {
379                for (k, v) in src_tbl {
380                    match dst_tbl.get_mut(k) {
381                        Some(dst_v) => Self::merge_toml(dst_v, v),
382                        None => {
383                            dst_tbl.insert(k.clone(), v.clone());
384                        },
385                    }
386                }
387            },
388            (dst_v, src_v) => *dst_v = src_v.clone(),
389        }
390    }
391
392    fn apply_env_overrides(&mut self) {
393        if let Ok(v) = std::env::var("BLZ_REFRESH_HOURS") {
394            if let Ok(n) = v.parse::<u32>() {
395                self.defaults.refresh_hours = n;
396            }
397        }
398        if let Ok(v) = std::env::var("BLZ_MAX_ARCHIVES") {
399            if let Ok(n) = v.parse::<usize>() {
400                self.defaults.max_archives = n;
401            }
402        }
403        if let Ok(v) = std::env::var("BLZ_FETCH_ENABLED") {
404            let norm = v.to_ascii_lowercase();
405            self.defaults.fetch_enabled = matches!(norm.as_str(), "1" | "true" | "yes" | "on");
406        }
407        if let Ok(v) = std::env::var("BLZ_FOLLOW_LINKS") {
408            match v.to_ascii_lowercase().as_str() {
409                "none" => self.defaults.follow_links = FollowLinks::None,
410                "first_party" | "firstparty" => {
411                    self.defaults.follow_links = FollowLinks::FirstParty;
412                },
413                "allowlist" => self.defaults.follow_links = FollowLinks::Allowlist,
414                _ => {},
415            }
416        }
417        if let Ok(v) = std::env::var("BLZ_ALLOWLIST") {
418            let list = v
419                .split(',')
420                .map(|s| s.trim().to_string())
421                .filter(|s| !s.is_empty())
422                .collect::<Vec<_>>();
423            if !list.is_empty() {
424                self.defaults.allowlist = list;
425            }
426        }
427        if let Ok(v) = std::env::var("BLZ_ROOT") {
428            let p = PathBuf::from(v);
429            if !p.as_os_str().is_empty() {
430                self.paths.root = p;
431            }
432        }
433    }
434}
435
436impl Default for Config {
437    fn default() -> Self {
438        Self {
439            defaults: DefaultsConfig {
440                refresh_hours: 24,
441                max_archives: 10,
442                fetch_enabled: true,
443                follow_links: FollowLinks::FirstParty,
444                allowlist: Vec::new(),
445            },
446            paths: PathsConfig {
447                root: directories::ProjectDirs::from("dev", "outfitter", profile::app_dir_slug())
448                    .map_or_else(
449                        || {
450                            // Expand home directory properly
451                            directories::BaseDirs::new().map_or_else(
452                                || PathBuf::from(".outfitter").join(profile::app_dir_slug()),
453                                |base| {
454                                    base.home_dir()
455                                        .join(".outfitter")
456                                        .join(profile::app_dir_slug())
457                                },
458                            )
459                        },
460                        |dirs| dirs.data_dir().to_path_buf(),
461                    ),
462            },
463        }
464    }
465}
466
467/// Per-source configuration that overrides global defaults.
468///
469/// Each documentation source can have its own configuration file (`settings.toml`)
470/// that overrides the global configuration for that specific source. This allows
471/// fine-grained control over fetching behavior, indexing parameters, and metadata.
472///
473/// ## File Location
474///
475/// Stored as `<cache_root>/<source_alias>/settings.toml`
476///
477/// ## Example Configuration File
478///
479/// ```toml
480/// [meta]
481/// name = "react"
482/// display_name = "React Documentation"
483/// homepage = "https://react.dev"
484/// repo = "https://github.com/facebook/react"
485///
486/// [fetch]
487/// refresh_hours = 12  # Override global default
488/// follow_links = "first_party"
489/// allowlist = ["reactjs.org", "react.dev"]
490///
491/// [index]
492/// max_heading_block_lines = 500
493/// ```
494#[derive(Debug, Clone, Serialize, Deserialize)]
495pub struct ToolConfig {
496    /// Metadata about the documentation source
497    pub meta: ToolMeta,
498    /// Fetching behavior overrides
499    pub fetch: FetchConfig,
500    /// Indexing parameter overrides
501    pub index: IndexConfig,
502}
503
504/// Metadata about a documentation source.
505///
506/// This information is used for display purposes and to provide context
507/// about the source of documentation being cached.
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct ToolMeta {
510    /// Unique identifier for this source (used as directory name).
511    ///
512    /// Should be a valid filename that uniquely identifies the source.
513    /// Typically lowercase with hyphens (e.g., "react", "node-js", "rust-std").
514    pub name: String,
515
516    /// Human-readable display name for the source.
517    ///
518    /// Used in search results and UI displays. If not provided, the `name`
519    /// field is used as fallback.
520    pub display_name: Option<String>,
521
522    /// Homepage URL for the documentation source.
523    ///
524    /// The main website or documentation portal for this source.
525    /// Used for reference and linking back to the original documentation.
526    pub homepage: Option<String>,
527
528    /// Repository URL for the documentation source.
529    ///
530    /// Link to the source code repository, if available. Useful for
531    /// understanding the project context and accessing source code.
532    pub repo: Option<String>,
533}
534
535/// Per-source fetching behavior overrides.
536///
537/// These settings override the global defaults for fetching behavior.
538/// Any `None` values will use the corresponding global default setting.
539#[derive(Debug, Clone, Serialize, Deserialize)]
540pub struct FetchConfig {
541    /// Override for refresh interval in hours.
542    ///
543    /// If `Some`, overrides the global `refresh_hours` setting for this source.
544    /// If `None`, uses the global default.
545    pub refresh_hours: Option<u32>,
546
547    /// Override for link following policy.
548    ///
549    /// If `Some`, overrides the global `follow_links` setting for this source.
550    /// If `None`, uses the global default.
551    pub follow_links: Option<FollowLinks>,
552
553    /// Override for allowed domains list.
554    ///
555    /// If `Some`, overrides the global `allowlist` setting for this source.
556    /// If `None`, uses the global default. Only used when `follow_links` is `Allowlist`.
557    pub allowlist: Option<Vec<String>>,
558}
559
560/// Per-source indexing parameter overrides.
561///
562/// These settings control how the documentation is processed and indexed
563/// for this specific source, overriding global defaults where specified.
564#[derive(Debug, Clone, Serialize, Deserialize)]
565pub struct IndexConfig {
566    /// Maximum lines to include in a single heading block.
567    ///
568    /// Controls how large sections are broken up during indexing. Larger values
569    /// include more context but may reduce search precision. Smaller values
570    /// provide more focused results but may split related content.
571    ///
572    /// If `None`, uses a sensible default based on content analysis.
573    pub max_heading_block_lines: Option<usize>,
574}
575
576impl ToolConfig {
577    /// Load per-source configuration from a file.
578    ///
579    /// Loads and parses a TOML configuration file for a specific documentation source.
580    /// The file should contain sections for `[meta]`, `[fetch]`, and `[index]`.
581    ///
582    /// # Arguments
583    ///
584    /// * `path` - Path to the configuration file (typically `settings.toml`)
585    ///
586    /// # Returns
587    ///
588    /// Returns the parsed configuration.
589    ///
590    /// # Errors
591    ///
592    /// Returns an error if:
593    /// - The file cannot be read (doesn't exist, permissions, I/O error)
594    /// - The file contains invalid TOML syntax
595    /// - The file contains invalid configuration values
596    /// - Required fields are missing (e.g., `meta.name`)
597    ///
598    /// # Examples
599    ///
600    /// ```rust,no_run
601    /// use blz_core::ToolConfig;
602    /// use std::path::Path;
603    ///
604    /// // Load source-specific configuration
605    /// let config_path = Path::new("sources/react/settings.toml");
606    /// let tool_config = ToolConfig::load(config_path)?;
607    ///
608    /// println!("Source: {}", tool_config.meta.name);
609    /// if let Some(refresh) = tool_config.fetch.refresh_hours {
610    ///     println!("Custom refresh interval: {} hours", refresh);
611    /// }
612    /// # Ok::<(), blz_core::Error>(())
613    /// ```
614    pub fn load(path: &Path) -> Result<Self> {
615        let content = fs::read_to_string(path)
616            .map_err(|e| Error::Config(format!("Failed to read tool config: {e}")))?;
617        toml::from_str(&content)
618            .map_err(|e| Error::Config(format!("Failed to parse tool config: {e}")))
619    }
620
621    /// Save per-source configuration to a file.
622    ///
623    /// Serializes the configuration to TOML format and writes it to the specified path.
624    /// The parent directory must already exist.
625    ///
626    /// # Arguments
627    ///
628    /// * `path` - Path where to save the configuration file
629    ///
630    /// # Errors
631    ///
632    /// Returns an error if:
633    /// - The configuration cannot be serialized to TOML
634    /// - The parent directory doesn't exist
635    /// - The file cannot be written (permissions, disk space, I/O error)
636    ///
637    /// # Examples
638    ///
639    /// ```rust,no_run
640    /// use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
641    /// use std::path::Path;
642    ///
643    /// let config = ToolConfig {
644    ///     meta: ToolMeta {
645    ///         name: "my-docs".to_string(),
646    ///         display_name: Some("My Documentation".to_string()),
647    ///         homepage: None,
648    ///         repo: None,
649    ///     },
650    ///     fetch: FetchConfig {
651    ///         refresh_hours: Some(6),
652    ///         follow_links: None,
653    ///         allowlist: None,
654    ///     },
655    ///     index: IndexConfig {
656    ///         max_heading_block_lines: Some(300),
657    ///     },
658    /// };
659    ///
660    /// let config_path = Path::new("my-docs/settings.toml");
661    /// config.save(config_path)?;
662    /// # Ok::<(), blz_core::Error>(())
663    /// ```
664    pub fn save(&self, path: &Path) -> Result<()> {
665        let content = toml::to_string_pretty(self)
666            .map_err(|e| Error::Config(format!("Failed to serialize tool config: {e}")))?;
667        fs::write(path, content)
668            .map_err(|e| Error::Config(format!("Failed to write tool config: {e}")))?;
669        Ok(())
670    }
671}
672
673#[cfg(test)]
674#[allow(
675    clippy::panic,
676    clippy::disallowed_macros,
677    clippy::unwrap_used,
678    clippy::unnecessary_wraps
679)]
680mod tests {
681    use super::*;
682    use proptest::prelude::*;
683    use std::fs;
684    use tempfile::TempDir;
685
686    // Test fixtures
687    fn create_test_config() -> Config {
688        Config {
689            defaults: DefaultsConfig {
690                refresh_hours: 12,
691                max_archives: 5,
692                fetch_enabled: true,
693                follow_links: FollowLinks::Allowlist,
694                allowlist: vec!["example.com".to_string(), "docs.rs".to_string()],
695            },
696            paths: PathsConfig {
697                root: PathBuf::from("/tmp/test"),
698            },
699        }
700    }
701
702    fn create_test_tool_config() -> ToolConfig {
703        ToolConfig {
704            meta: ToolMeta {
705                name: "test-tool".to_string(),
706                display_name: Some("Test Tool".to_string()),
707                homepage: Some("https://test.com".to_string()),
708                repo: Some("https://github.com/test/tool".to_string()),
709            },
710            fetch: FetchConfig {
711                refresh_hours: Some(6),
712                follow_links: Some(FollowLinks::FirstParty),
713                allowlist: Some(vec!["allowed.com".to_string()]),
714            },
715            index: IndexConfig {
716                max_heading_block_lines: Some(100),
717            },
718        }
719    }
720
721    #[test]
722    fn test_default_config_values() {
723        // Given: Default configuration is requested
724        let config = Config::default();
725
726        // When: Examining default values
727        // Then: Should have sensible defaults
728        assert_eq!(config.defaults.refresh_hours, 24);
729        assert_eq!(config.defaults.max_archives, 10);
730        assert!(config.defaults.fetch_enabled);
731        assert!(matches!(
732            config.defaults.follow_links,
733            FollowLinks::FirstParty
734        ));
735        assert!(config.defaults.allowlist.is_empty());
736        assert!(!config.paths.root.as_os_str().is_empty());
737    }
738
739    #[test]
740    fn test_follow_links_serialization() -> Result<()> {
741        // Given: Different FollowLinks variants
742        let variants = vec![
743            FollowLinks::None,
744            FollowLinks::FirstParty,
745            FollowLinks::Allowlist,
746        ];
747
748        for variant in variants {
749            // When: Serializing and deserializing
750            let serialized = serde_json::to_string(&variant)?;
751            let deserialized: FollowLinks = serde_json::from_str(&serialized)?;
752
753            // Then: Should round-trip correctly
754            assert_eq!(variant, deserialized, "Round-trip failed for {variant:?}");
755        }
756        Ok(())
757    }
758
759    #[test]
760    fn test_config_save_and_load_roundtrip() -> Result<()> {
761        // Given: A temporary directory and test configuration
762        let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
763        let config_path = temp_dir.path().join("test_config.toml");
764        let original_config = create_test_config();
765
766        // When: Saving and then loading the configuration
767        let content = toml::to_string_pretty(&original_config)
768            .map_err(|e| Error::Config(format!("Failed to serialize: {e}")))?;
769        fs::write(&config_path, content)
770            .map_err(|e| Error::Config(format!("Failed to write: {e}")))?;
771
772        let loaded_config: Config = {
773            let content = fs::read_to_string(&config_path)
774                .map_err(|e| Error::Config(format!("Failed to read: {e}")))?;
775            toml::from_str(&content).map_err(|e| Error::Config(format!("Failed to parse: {e}")))?
776        };
777
778        // Then: Configurations should be identical
779        assert_eq!(
780            loaded_config.defaults.refresh_hours,
781            original_config.defaults.refresh_hours
782        );
783        assert_eq!(
784            loaded_config.defaults.max_archives,
785            original_config.defaults.max_archives
786        );
787        assert_eq!(
788            loaded_config.defaults.fetch_enabled,
789            original_config.defaults.fetch_enabled
790        );
791        assert_eq!(
792            loaded_config.defaults.allowlist,
793            original_config.defaults.allowlist
794        );
795        assert_eq!(loaded_config.paths.root, original_config.paths.root);
796
797        Ok(())
798    }
799
800    #[test]
801    fn test_config_load_missing_file() {
802        // Given: A non-existent config file path
803        let non_existent = PathBuf::from("/definitely/does/not/exist/config.toml");
804
805        // When: Attempting to load config
806        let result = (|| -> Result<Config> {
807            let content = fs::read_to_string(&non_existent)
808                .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
809            toml::from_str(&content)
810                .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
811        })();
812
813        // Then: Should return appropriate error
814        assert!(result.is_err());
815        match result {
816            Err(Error::Config(msg)) => assert!(msg.contains("Failed to read config")),
817            _ => unreachable!("Expected Config error"),
818        }
819    }
820
821    #[test]
822    fn test_config_parse_invalid_toml() {
823        // Given: Invalid TOML content
824        let temp_dir = TempDir::new().expect("Failed to create temp dir");
825        let config_path = temp_dir.path().join("invalid.toml");
826        fs::write(&config_path, "this is not valid toml [[[").expect("Failed to write test file");
827
828        // When: Attempting to parse
829        let result = (|| -> Result<Config> {
830            let content = fs::read_to_string(&config_path)
831                .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
832            toml::from_str(&content)
833                .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
834        })();
835
836        // Then: Should return parse error
837        assert!(result.is_err());
838        if let Err(Error::Config(msg)) = result {
839            assert!(msg.contains("Failed to parse config"));
840        } else {
841            panic!("Expected Config parse error");
842        }
843    }
844
845    #[test]
846    fn test_config_save_creates_directory() -> Result<()> {
847        // Given: A temporary directory and nested config path
848        let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
849        let nested_path = temp_dir
850            .path()
851            .join("nested")
852            .join("deeper")
853            .join("config.toml");
854        let config = create_test_config();
855
856        // When: Saving config to nested path (simulating Config::save logic)
857        let parent = nested_path
858            .parent()
859            .ok_or_else(|| Error::Config("Invalid config path".into()))?;
860        fs::create_dir_all(parent)
861            .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
862
863        let content = toml::to_string_pretty(&config)
864            .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
865        fs::write(&nested_path, content)
866            .map_err(|e| Error::Config(format!("Failed to write config: {e}")))?;
867
868        // Then: Directory should be created and file should exist
869        assert!(nested_path.exists());
870        assert!(
871            nested_path
872                .parent()
873                .expect("path should have parent")
874                .exists()
875        );
876
877        Ok(())
878    }
879
880    #[test]
881    fn test_tool_config_roundtrip() -> Result<()> {
882        // Given: A temporary file and test tool configuration
883        let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
884        let config_path = temp_dir.path().join("tool.toml");
885        let original_config = create_test_tool_config();
886
887        // When: Saving and loading the tool configuration
888        original_config.save(&config_path)?;
889        let loaded_config = ToolConfig::load(&config_path)?;
890
891        // Then: Configurations should be identical
892        assert_eq!(loaded_config.meta.name, original_config.meta.name);
893        assert_eq!(
894            loaded_config.meta.display_name,
895            original_config.meta.display_name
896        );
897        assert_eq!(loaded_config.meta.homepage, original_config.meta.homepage);
898        assert_eq!(loaded_config.meta.repo, original_config.meta.repo);
899        assert_eq!(
900            loaded_config.fetch.refresh_hours,
901            original_config.fetch.refresh_hours
902        );
903        assert_eq!(
904            loaded_config.fetch.allowlist,
905            original_config.fetch.allowlist
906        );
907        assert_eq!(
908            loaded_config.index.max_heading_block_lines,
909            original_config.index.max_heading_block_lines
910        );
911
912        Ok(())
913    }
914
915    #[test]
916    fn test_tool_config_load_nonexistent_file() {
917        // Given: A non-existent file path
918        let non_existent = PathBuf::from("/does/not/exist/tool.toml");
919
920        // When: Attempting to load
921        let result = ToolConfig::load(&non_existent);
922
923        // Then: Should return appropriate error
924        assert!(result.is_err());
925        if let Err(Error::Config(msg)) = result {
926            assert!(msg.contains("Failed to read tool config"));
927        } else {
928            panic!("Expected Config error");
929        }
930    }
931
932    #[test]
933    fn test_config_with_extreme_values() -> Result<()> {
934        // Given: Configuration with extreme but valid values (avoiding serialization limits)
935        let extreme_config = Config {
936            defaults: DefaultsConfig {
937                refresh_hours: 1_000_000, // Large but not MAX to avoid TOML issues
938                max_archives: 1_000_000,  // Large but not MAX to avoid TOML issues
939                fetch_enabled: false,
940                follow_links: FollowLinks::None,
941                allowlist: vec!["a".repeat(1000)], // Very long domain
942            },
943            paths: PathsConfig {
944                root: PathBuf::from("/".repeat(100)), // Very long path
945            },
946        };
947
948        // When: Serializing and deserializing
949        let serialized = toml::to_string_pretty(&extreme_config)
950            .map_err(|e| Error::Config(format!("Serialize failed: {e}")))?;
951        let deserialized: Config = toml::from_str(&serialized)
952            .map_err(|e| Error::Config(format!("Deserialize failed: {e}")))?;
953
954        // Then: Should handle extreme values correctly
955        assert_eq!(deserialized.defaults.refresh_hours, 1_000_000);
956        assert_eq!(deserialized.defaults.max_archives, 1_000_000);
957        assert!(!deserialized.defaults.fetch_enabled);
958        assert_eq!(deserialized.defaults.allowlist.len(), 1);
959        assert_eq!(deserialized.defaults.allowlist[0].len(), 1000);
960
961        Ok(())
962    }
963
964    #[test]
965    fn test_config_empty_allowlist() -> Result<()> {
966        // Given: Configuration with empty allowlist
967        let config = Config {
968            defaults: DefaultsConfig {
969                refresh_hours: 24,
970                max_archives: 10,
971                fetch_enabled: true,
972                follow_links: FollowLinks::Allowlist,
973                allowlist: vec![], // Empty allowlist
974            },
975            paths: PathsConfig {
976                root: PathBuf::from("/tmp"),
977            },
978        };
979
980        // When: Serializing and deserializing
981        let serialized = toml::to_string_pretty(&config)?;
982        let deserialized: Config = toml::from_str(&serialized)?;
983
984        // Then: Empty allowlist should be preserved
985        assert!(deserialized.defaults.allowlist.is_empty());
986        assert!(matches!(
987            deserialized.defaults.follow_links,
988            FollowLinks::Allowlist
989        ));
990
991        Ok(())
992    }
993
994    // Property-based tests
995    proptest! {
996        #[test]
997        fn test_config_refresh_hours_roundtrip(refresh_hours in 1u32..=365*24) {
998            let config = Config {
999                defaults: DefaultsConfig {
1000                    refresh_hours,
1001                    max_archives: 10,
1002                    fetch_enabled: true,
1003                    follow_links: FollowLinks::FirstParty,
1004                    allowlist: vec![],
1005                },
1006                paths: PathsConfig {
1007                    root: PathBuf::from("/tmp"),
1008                },
1009            };
1010
1011            let serialized = toml::to_string_pretty(&config).expect("should serialize");
1012            let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1013
1014            prop_assert_eq!(deserialized.defaults.refresh_hours, refresh_hours);
1015        }
1016
1017        #[test]
1018        fn test_config_max_archives_roundtrip(max_archives in 1usize..=1000) {
1019            let config = Config {
1020                defaults: DefaultsConfig {
1021                    refresh_hours: 24,
1022                    max_archives,
1023                    fetch_enabled: true,
1024                    follow_links: FollowLinks::FirstParty,
1025                    allowlist: vec![],
1026                },
1027                paths: PathsConfig {
1028                    root: PathBuf::from("/tmp"),
1029                },
1030            };
1031
1032            let serialized = toml::to_string_pretty(&config).expect("should serialize");
1033            let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1034
1035            prop_assert_eq!(deserialized.defaults.max_archives, max_archives);
1036        }
1037
1038        #[test]
1039        fn test_config_allowlist_roundtrip(allowlist in prop::collection::vec(r"[a-z0-9\.-]+", 0..=10)) {
1040            let config = Config {
1041                defaults: DefaultsConfig {
1042                    refresh_hours: 24,
1043                    max_archives: 10,
1044                    fetch_enabled: true,
1045                    follow_links: FollowLinks::Allowlist,
1046                    allowlist: allowlist.clone(),
1047                },
1048                paths: PathsConfig {
1049                    root: PathBuf::from("/tmp"),
1050                },
1051            };
1052
1053            let serialized = toml::to_string_pretty(&config).expect("should serialize");
1054            let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1055
1056            prop_assert_eq!(deserialized.defaults.allowlist, allowlist);
1057        }
1058    }
1059
1060    /*
1061    // Security-focused tests
1062    #[test]
1063    fn test_config_path_traversal_prevention() {
1064            // Given: Config with potentially malicious paths
1065            let malicious_paths = vec![
1066                "../../../etc/passwd",
1067                "..\\..\\..\\windows\\system32",
1068                "/etc/shadow",
1069                "../../.ssh/id_rsa",
1070            ];
1071
1072            for malicious_path in malicious_paths {
1073                // When: Creating config with malicious path
1074                let config = Config {
1075                    defaults: DefaultsConfig {
1076                        refresh_hours: 24,
1077                        max_archives: 10,
1078                        fetch_enabled: true,
1079                        follow_links: FollowLinks::FirstParty,
1080                        allowlist: vec![],
1081                    },
1082                    paths: PathsConfig {
1083                        root: PathBuf::from(malicious_path),
1084                    },
1085                };
1086
1087                // Then: Should still serialize/deserialize (path validation is separate)
1088                let serialized = toml::to_string_pretty(&config).expect("should serialize");
1089                let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1090                assert_eq!(deserialized.paths.root, PathBuf::from(malicious_path));
1091            }
1092        }
1093
1094        #[test]
1095        fn test_config_malicious_toml_injection() {
1096            // Given: Potentially malicious TOML strings that could break parsing
1097            let malicious_strings = vec![
1098                "\n[malicious]\nkey = \"value\"",
1099                "\"quotes\"in\"weird\"places",
1100                "key = \"value\"\n[new_section]",
1101                "unicode = \"\\u0000\\u0001\\u0002\"",
1102            ];
1103
1104            for malicious_string in malicious_strings {
1105                // When: Setting allowlist with potentially malicious content
1106                let config = Config {
1107                    defaults: DefaultsConfig {
1108                        refresh_hours: 24,
1109                        max_archives: 10,
1110                        fetch_enabled: true,
1111                        follow_links: FollowLinks::Allowlist,
1112                        allowlist: vec![malicious_string.to_string()],
1113                    },
1114                    paths: PathsConfig {
1115                        root: PathBuf::from("/tmp"),
1116                    },
1117                };
1118
1119                // Then: Should serialize safely (TOML library handles escaping)
1120                let result = toml::to_string_pretty(&config);
1121                assert!(
1122                    result.is_ok(),
1123                    "Failed to serialize config with: {malicious_string}"
1124                );
1125
1126                if let Ok(serialized) = result {
1127                    let deserialized_result: std::result::Result<Config, _> =
1128                        toml::from_str(&serialized);
1129                    assert!(
1130                        deserialized_result.is_ok(),
1131                        "Failed to deserialize config with: {malicious_string}"
1132                    );
1133                }
1134            }
1135        }
1136
1137        #[test]
1138        fn test_config_unicode_handling() -> Result<()> {
1139            // Given: Configuration with Unicode content
1140            let unicode_config = Config {
1141                defaults: DefaultsConfig {
1142                    refresh_hours: 24,
1143                    max_archives: 10,
1144                    fetch_enabled: true,
1145                    follow_links: FollowLinks::Allowlist,
1146                    allowlist: vec![
1147                        "例え.com".to_string(),    // Japanese
1148                        "مثال.com".to_string(),    // Arabic
1149                        "пример.com".to_string(),  // Cyrillic
1150                        "🚀.test.com".to_string(), // Emoji
1151                    ],
1152                },
1153                paths: PathsConfig {
1154                    root: PathBuf::from("/tmp/测试"), // Chinese characters
1155                },
1156            };
1157
1158            // When: Serializing and deserializing
1159            let serialized = toml::to_string_pretty(&unicode_config)?;
1160            let deserialized: Config = toml::from_str(&serialized)?;
1161
1162            // Then: Unicode should be preserved correctly
1163            assert_eq!(deserialized.defaults.allowlist.len(), 4);
1164            assert!(
1165                deserialized
1166                    .defaults
1167                    .allowlist
1168                    .contains(&"例え.com".to_string())
1169            );
1170            assert!(
1171                deserialized
1172                    .defaults
1173                    .allowlist
1174                    .contains(&"🚀.test.com".to_string())
1175            );
1176            assert_eq!(deserialized.paths.root, PathBuf::from("/tmp/测试"));
1177
1178            Ok(())
1179        }
1180
1181        #[test]
1182        fn test_config_edge_case_empty_values() -> Result<()> {
1183            // Given: Configuration with empty values
1184            let empty_config = Config {
1185                defaults: DefaultsConfig {
1186                    refresh_hours: 0, // Edge case: zero refresh
1187                    max_archives: 0,  // Edge case: no archives
1188                    fetch_enabled: false,
1189                    follow_links: FollowLinks::None,
1190                    allowlist: vec![String::new()], // Empty string in allowlist
1191                },
1192                paths: PathsConfig {
1193                    root: PathBuf::from(""), // Empty path
1194                },
1195            };
1196
1197            // When: Serializing and deserializing
1198            let serialized = toml::to_string_pretty(&empty_config)?;
1199            let deserialized: Config = toml::from_str(&serialized)?;
1200
1201            // Then: Empty/zero values should be handled correctly
1202            assert_eq!(deserialized.defaults.refresh_hours, 0);
1203            assert_eq!(deserialized.defaults.max_archives, 0);
1204            assert_eq!(deserialized.defaults.allowlist.len(), 1);
1205            assert_eq!(deserialized.defaults.allowlist[0], "");
1206            assert_eq!(deserialized.paths.root, PathBuf::from(""));
1207
1208            Ok(())
1209        }
1210    }
1211    */
1212}