Skip to main content

blz_core/
config.rs

1//! Configuration management for blz cache system.
2//!
3//! This module provides hierarchical configuration with global defaults and per-source overrides.
4//! Configuration is stored in TOML format and supports environment variable overrides.
5//!
6//! ## Configuration Hierarchy
7//!
8//! 1. **Global config**: Platform-specific config directory (see `GlobalConfig` docs)
9//! 2. **Per-source config**: `<source_dir>/settings.toml`
10//! 3. **Environment variables**: `CACHE_*` prefix
11//!
12//! ## Examples
13//!
14//! ### Loading global configuration:
15//!
16//! ```rust
17//! use blz_core::{Config, Result};
18//!
19//! // Load from default location or create with defaults
20//! let config = Config::load()?;
21//! println!("Cache root: {}", config.paths.root.display());
22//! println!("Refresh interval: {} hours", config.defaults.refresh_hours);
23//! # Ok::<(), blz_core::Error>(())
24//! ```
25//!
26//! ### Working with tool-specific configuration:
27//!
28//! ```rust,no_run
29//! use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
30//! use std::path::Path;
31//!
32//! let tool_config = ToolConfig {
33//!     meta: ToolMeta {
34//!         name: "react".to_string(),
35//!         display_name: Some("React Documentation".to_string()),
36//!         homepage: Some("https://react.dev".to_string()),
37//!         repo: Some("https://github.com/facebook/react".to_string()),
38//!     },
39//!     fetch: FetchConfig {
40//!         refresh_hours: Some(12), // Override global default
41//!         follow_links: None,      // Use global default
42//!         allowlist: None,         // Use global default
43//!     },
44//!     index: IndexConfig {
45//!         max_heading_block_lines: Some(500),
46//!         filter_non_english: None, // Use global default
47//!     },
48//! };
49//!
50//! // Save to file
51//! tool_config.save(Path::new("react/settings.toml"))?;
52//! # Ok::<(), blz_core::Error>(())
53//! ```
54
55use crate::{Error, Result, profile};
56use serde::{Deserialize, Serialize};
57use std::fs;
58use std::path::{Path, PathBuf};
59
60/// Default value for `filter_non_english` setting.
61///
62/// Returns `true` to enable non-English content filtering by default,
63/// maintaining backward compatibility with existing behavior.
64const fn default_filter_non_english() -> bool {
65    true
66}
67
68/// Global configuration for the blz cache system.
69///
70/// Contains default settings that apply to all sources unless overridden by per-source configuration.
71/// Configuration is automatically loaded from the system config directory or created with sensible defaults.
72///
73/// ## File Location
74///
75/// The configuration file is stored at (searched in order):
76/// - XDG: `$XDG_CONFIG_HOME/blz/config.toml` or `~/.config/blz/config.toml`
77/// - Dotfile fallback: `~/.blz/config.toml`
78///
79/// A `config.local.toml` in the same directory overrides keys from `config.toml`.
80///
81/// ## Example Configuration File
82///
83/// ```toml
84/// [defaults]
85/// refresh_hours = 24
86/// max_archives = 10
87/// fetch_enabled = true
88/// follow_links = "first_party"
89/// allowlist = ["docs.rs", "developer.mozilla.org"]
90///
91/// [paths]
92/// root = "/home/user/.outfitter/blz"
93/// ```
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct Config {
96    /// Default settings for all sources
97    pub defaults: DefaultsConfig,
98    /// File system paths configuration
99    pub paths: PathsConfig,
100}
101
102/// Default settings that apply to all sources unless overridden.
103///
104/// These settings control fetching behavior, caching policies, and link following rules.
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct DefaultsConfig {
107    /// How often to refresh cached content (in hours).
108    ///
109    /// Sources are only re-fetched if they haven't been updated within this interval.
110    /// Set to 0 to always fetch on access.
111    pub refresh_hours: u32,
112
113    /// Maximum number of archived versions to keep per source.
114    ///
115    /// When a source is updated, the previous version is archived. This setting
116    /// controls how many historical versions to retain for diff generation.
117    pub max_archives: usize,
118
119    /// Whether fetching from remote sources is enabled.
120    ///
121    /// When disabled, only locally cached content is used. Useful for offline work
122    /// or environments with restricted network access.
123    pub fetch_enabled: bool,
124
125    /// Policy for following links in llms.txt files.
126    ///
127    /// Controls whether and which external links should be followed when processing
128    /// llms.txt files that contain references to other documentation sources.
129    pub follow_links: FollowLinks,
130
131    /// Domains allowed for link following.
132    ///
133    /// Only used when `follow_links` is set to `Allowlist`. Links to domains
134    /// not in this list will be ignored.
135    pub allowlist: Vec<String>,
136
137    /// Default language filtering behavior.
138    ///
139    /// When `true`, non-English content is filtered during document processing.
140    /// When `false`, all content is retained regardless of language.
141    /// Defaults to `true` for backward compatibility.
142    #[serde(default = "default_filter_non_english")]
143    pub filter_non_english: bool,
144}
145
146/// Policy for following external links in llms.txt files.
147///
148/// This controls how the system handles links to other documentation sources
149/// within llms.txt files.
150#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
151#[serde(rename_all = "snake_case")]
152pub enum FollowLinks {
153    /// Never follow external links.
154    ///
155    /// Only process the original llms.txt file, ignoring any links to other sources.
156    None,
157
158    /// Follow links to the same domain and its immediate subdomains.
159    ///
160    /// For example, if processing `docs.example.com/llms.txt`, links to
161    /// `api.example.com/docs` or `example.com/guide` would be followed,
162    /// but `other-site.com/docs` would be ignored.
163    FirstParty,
164
165    /// Only follow links to domains in the allowlist.
166    ///
167    /// Use the `allowlist` field in `DefaultsConfig` to specify which domains
168    /// are permitted. This provides fine-grained control over which external
169    /// sources are trusted.
170    Allowlist,
171}
172
173/// File system paths configuration.
174///
175/// Defines where cached content, indices, and metadata are stored on the local filesystem.
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct PathsConfig {
178    /// Root directory for all cached content.
179    ///
180    /// Each source gets its own subdirectory under this root. The directory
181    /// structure is: `root/<source_alias>/`
182    ///
183    /// Default locations:
184    /// - Linux: `~/.local/share/blz`
185    /// - macOS: `~/Library/Application Support/dev.outfitter.blz`
186    /// - Windows: `%APPDATA%\outfitter\blz`
187    pub root: PathBuf,
188}
189
190impl Config {
191    /// Load configuration from the default location or create with defaults.
192    ///
193    /// This method attempts to load the configuration from the system config directory.
194    /// If the file doesn't exist, it returns a configuration with sensible defaults.
195    /// If the file exists but is malformed, it returns an error.
196    ///
197    /// # Returns
198    ///
199    /// Returns the loaded configuration or a default configuration if no file exists.
200    ///
201    /// # Errors
202    ///
203    /// Returns an error if:
204    /// - The config directory cannot be determined (unsupported platform)
205    /// - The config file exists but cannot be read (permissions, I/O error)
206    /// - The config file exists but contains invalid TOML syntax
207    /// - The config file exists but contains invalid configuration values
208    ///
209    /// # Examples
210    ///
211    /// ```rust
212    /// use blz_core::Config;
213    ///
214    /// // Load existing config or create with defaults
215    /// let config = Config::load()?;
216    ///
217    /// if config.defaults.fetch_enabled {
218    ///     println!("Fetching is enabled");
219    /// }
220    /// # Ok::<(), blz_core::Error>(())
221    /// ```
222    pub fn load() -> Result<Self> {
223        // Determine base config path (BLZ_CONFIG/BLZ_CONFIG_DIR, XDG, dotfile), or use defaults
224        let base_path = Self::existing_config_path()?;
225
226        // Load base
227        let mut base_value: toml::Value = if let Some(ref path) = base_path {
228            let content = fs::read_to_string(path)
229                .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
230            toml::from_str(&content)
231                .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))?
232        } else {
233            let default_str = toml::to_string(&Self::default())
234                .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?;
235            toml::from_str(&default_str)
236                .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?
237        };
238
239        // Merge optional local override next to resolved base directory
240        let base_dir = base_path.as_deref().map_or_else(
241            || {
242                Self::canonical_config_path().map_or_else(
243                    |_| PathBuf::new(),
244                    |p| p.parent().map(Path::to_path_buf).unwrap_or_default(),
245                )
246            },
247            |bp| bp.parent().map(Path::to_path_buf).unwrap_or_default(),
248        );
249
250        let local_path = base_dir.join("config.local.toml");
251        if local_path.exists() {
252            let content = fs::read_to_string(&local_path)
253                .map_err(|e| Error::Config(format!("Failed to read local config: {e}")))?;
254            let local_value: toml::Value = toml::from_str(&content)
255                .map_err(|e| Error::Config(format!("Failed to parse local config: {e}")))?;
256            Self::merge_toml(&mut base_value, &local_value);
257        }
258
259        // Deserialize
260        let mut config: Self = base_value
261            .try_into()
262            .map_err(|e| Error::Config(format!("Failed to materialize config: {e}")))?;
263
264        // Apply env overrides
265        config.apply_env_overrides();
266
267        Ok(config)
268    }
269
270    /// Save the configuration to the default location.
271    ///
272    /// This method serializes the configuration to TOML format and writes it to
273    /// the system config directory. Parent directories are created if they don't exist.
274    ///
275    /// # Errors
276    ///
277    /// Returns an error if:
278    /// - The config directory cannot be determined (unsupported platform)
279    /// - Parent directories cannot be created (permissions, disk space)
280    /// - The configuration cannot be serialized to TOML
281    /// - The file cannot be written (permissions, disk space, I/O error)
282    ///
283    /// # Examples
284    ///
285    /// ```rust,no_run
286    /// use blz_core::{Config, DefaultsConfig, PathsConfig, FollowLinks};
287    /// use std::path::PathBuf;
288    ///
289    /// let mut config = Config::load()?;
290    /// config.defaults.refresh_hours = 12; // Update refresh interval
291    /// config.save()?; // Persist changes
292    /// # Ok::<(), blz_core::Error>(())
293    /// ```
294    pub fn save(&self) -> Result<()> {
295        let config_path = Self::save_target_path()?;
296        let parent = config_path
297            .parent()
298            .ok_or_else(|| Error::Config("Invalid config path".into()))?;
299
300        fs::create_dir_all(parent)
301            .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
302
303        let content = toml::to_string_pretty(self)
304            .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
305
306        let tmp = parent.join("config.toml.tmp");
307        fs::write(&tmp, &content)
308            .map_err(|e| Error::Config(format!("Failed to write temp config: {e}")))?;
309        // Best-effort atomic replace; on Windows, rename() replaces if target does not exist.
310        // SAFETY: config.toml write is replaced in one step to avoid torn files.
311        #[cfg(target_os = "windows")]
312        if config_path.exists() {
313            fs::remove_file(&config_path)
314                .map_err(|e| Error::Config(format!("Failed to remove existing config: {e}")))?;
315        }
316        std::fs::rename(&tmp, &config_path)
317            .map_err(|e| Error::Config(format!("Failed to replace config: {e}")))?;
318
319        Ok(())
320    }
321
322    /// Get the path where the global configuration file is stored.
323    ///
324    /// Uses the system-appropriate config directory based on the platform:
325    /// - Linux: `~/.config/blz/global.toml`
326    /// - macOS: `~/Library/Application Support/dev.outfitter.blz/global.toml`
327    /// - Windows: `%APPDATA%\outfitter\blz\global.toml`
328    ///
329    /// # Errors
330    ///
331    /// Returns an error if the system config directory cannot be determined,
332    /// which may happen on unsupported platforms or in sandboxed environments.
333    fn canonical_config_path() -> Result<PathBuf> {
334        let xdg = std::env::var("XDG_CONFIG_HOME")
335            .ok()
336            .map(PathBuf::from)
337            .or_else(|| directories::BaseDirs::new().map(|b| b.home_dir().join(".config")))
338            .ok_or_else(|| Error::Config("Failed to determine XDG config directory".into()))?;
339        Ok(xdg.join(profile::app_dir_slug()).join("config.toml"))
340    }
341
342    fn dotfile_config_path() -> Result<PathBuf> {
343        let home = directories::BaseDirs::new()
344            .map(|b| b.home_dir().to_path_buf())
345            .ok_or_else(|| Error::Config("Failed to determine home directory".into()))?;
346        Ok(home.join(profile::dot_dir_slug()).join("config.toml"))
347    }
348
349    fn existing_config_path() -> Result<Option<PathBuf>> {
350        // 1) BLZ_CONFIG (file)
351        if let Ok(explicit) = std::env::var("BLZ_CONFIG") {
352            let explicit = explicit.trim();
353            if !explicit.is_empty() {
354                let p = PathBuf::from(explicit);
355                if p.is_file() && p.exists() {
356                    return Ok(Some(p));
357                }
358            }
359        }
360
361        // 2) BLZ_CONFIG_DIR (dir)
362        if let Ok(dir) = std::env::var("BLZ_CONFIG_DIR") {
363            let dir = dir.trim();
364            if !dir.is_empty() {
365                let p = PathBuf::from(dir).join("config.toml");
366                if p.is_file() && p.exists() {
367                    return Ok(Some(p));
368                }
369            }
370        }
371
372        // 3) XDG
373        let xdg = Self::canonical_config_path()?;
374        if xdg.exists() {
375            return Ok(Some(xdg));
376        }
377        // 4) Dotfile
378        let dot = Self::dotfile_config_path()?;
379        if dot.exists() {
380            return Ok(Some(dot));
381        }
382        Ok(None)
383    }
384
385    fn save_target_path() -> Result<PathBuf> {
386        if let Some(existing) = Self::existing_config_path()? {
387            return Ok(existing);
388        }
389        Self::canonical_config_path()
390    }
391
392    fn merge_toml(dst: &mut toml::Value, src: &toml::Value) {
393        use toml::Value::Table;
394        match (dst, src) {
395            (Table(dst_tbl), Table(src_tbl)) => {
396                for (k, v) in src_tbl {
397                    match dst_tbl.get_mut(k) {
398                        Some(dst_v) => Self::merge_toml(dst_v, v),
399                        None => {
400                            dst_tbl.insert(k.clone(), v.clone());
401                        },
402                    }
403                }
404            },
405            (dst_v, src_v) => *dst_v = src_v.clone(),
406        }
407    }
408
409    fn apply_env_overrides(&mut self) {
410        if let Ok(v) = std::env::var("BLZ_REFRESH_HOURS") {
411            if let Ok(n) = v.parse::<u32>() {
412                self.defaults.refresh_hours = n;
413            }
414        }
415        if let Ok(v) = std::env::var("BLZ_MAX_ARCHIVES") {
416            if let Ok(n) = v.parse::<usize>() {
417                self.defaults.max_archives = n;
418            }
419        }
420        if let Ok(v) = std::env::var("BLZ_FETCH_ENABLED") {
421            let norm = v.to_ascii_lowercase();
422            self.defaults.fetch_enabled = matches!(norm.as_str(), "1" | "true" | "yes" | "on");
423        }
424        if let Ok(v) = std::env::var("BLZ_FOLLOW_LINKS") {
425            match v.to_ascii_lowercase().as_str() {
426                "none" => self.defaults.follow_links = FollowLinks::None,
427                "first_party" | "firstparty" => {
428                    self.defaults.follow_links = FollowLinks::FirstParty;
429                },
430                "allowlist" => self.defaults.follow_links = FollowLinks::Allowlist,
431                _ => {},
432            }
433        }
434        if let Ok(v) = std::env::var("BLZ_ALLOWLIST") {
435            let list = v
436                .split(',')
437                .map(|s| s.trim().to_string())
438                .filter(|s| !s.is_empty())
439                .collect::<Vec<_>>();
440            if !list.is_empty() {
441                self.defaults.allowlist = list;
442            }
443        }
444        if let Ok(v) = std::env::var("BLZ_ROOT") {
445            let p = PathBuf::from(v);
446            if !p.as_os_str().is_empty() {
447                self.paths.root = p;
448            }
449        }
450    }
451}
452
453impl Default for Config {
454    fn default() -> Self {
455        Self {
456            defaults: DefaultsConfig {
457                refresh_hours: 24,
458                max_archives: 10,
459                fetch_enabled: true,
460                follow_links: FollowLinks::FirstParty,
461                allowlist: Vec::new(),
462                filter_non_english: true,
463            },
464            paths: PathsConfig {
465                root: directories::ProjectDirs::from("dev", "outfitter", profile::app_dir_slug())
466                    .map_or_else(
467                        || {
468                            // Expand home directory properly
469                            directories::BaseDirs::new().map_or_else(
470                                || PathBuf::from(".outfitter").join(profile::app_dir_slug()),
471                                |base| {
472                                    base.home_dir()
473                                        .join(".outfitter")
474                                        .join(profile::app_dir_slug())
475                                },
476                            )
477                        },
478                        |dirs| dirs.data_dir().to_path_buf(),
479                    ),
480            },
481        }
482    }
483}
484
485/// Per-source configuration that overrides global defaults.
486///
487/// Each documentation source can have its own configuration file (`settings.toml`)
488/// that overrides the global configuration for that specific source. This allows
489/// fine-grained control over fetching behavior, indexing parameters, and metadata.
490///
491/// ## File Location
492///
493/// Stored as `<cache_root>/<source_alias>/settings.toml`
494///
495/// ## Example Configuration File
496///
497/// ```toml
498/// [meta]
499/// name = "react"
500/// display_name = "React Documentation"
501/// homepage = "https://react.dev"
502/// repo = "https://github.com/facebook/react"
503///
504/// [fetch]
505/// refresh_hours = 12  # Override global default
506/// follow_links = "first_party"
507/// allowlist = ["reactjs.org", "react.dev"]
508///
509/// [index]
510/// max_heading_block_lines = 500
511/// ```
512#[derive(Debug, Clone, Serialize, Deserialize)]
513pub struct ToolConfig {
514    /// Metadata about the documentation source
515    pub meta: ToolMeta,
516    /// Fetching behavior overrides
517    pub fetch: FetchConfig,
518    /// Indexing parameter overrides
519    pub index: IndexConfig,
520}
521
522/// Metadata about a documentation source.
523///
524/// This information is used for display purposes and to provide context
525/// about the source of documentation being cached.
526#[derive(Debug, Clone, Serialize, Deserialize)]
527pub struct ToolMeta {
528    /// Unique identifier for this source (used as directory name).
529    ///
530    /// Should be a valid filename that uniquely identifies the source.
531    /// Typically lowercase with hyphens (e.g., "react", "node-js", "rust-std").
532    pub name: String,
533
534    /// Human-readable display name for the source.
535    ///
536    /// Used in search results and UI displays. If not provided, the `name`
537    /// field is used as fallback.
538    pub display_name: Option<String>,
539
540    /// Homepage URL for the documentation source.
541    ///
542    /// The main website or documentation portal for this source.
543    /// Used for reference and linking back to the original documentation.
544    pub homepage: Option<String>,
545
546    /// Repository URL for the documentation source.
547    ///
548    /// Link to the source code repository, if available. Useful for
549    /// understanding the project context and accessing source code.
550    pub repo: Option<String>,
551}
552
553/// Per-source fetching behavior overrides.
554///
555/// These settings override the global defaults for fetching behavior.
556/// Any `None` values will use the corresponding global default setting.
557#[derive(Debug, Clone, Serialize, Deserialize)]
558pub struct FetchConfig {
559    /// Override for refresh interval in hours.
560    ///
561    /// If `Some`, overrides the global `refresh_hours` setting for this source.
562    /// If `None`, uses the global default.
563    pub refresh_hours: Option<u32>,
564
565    /// Override for link following policy.
566    ///
567    /// If `Some`, overrides the global `follow_links` setting for this source.
568    /// If `None`, uses the global default.
569    pub follow_links: Option<FollowLinks>,
570
571    /// Override for allowed domains list.
572    ///
573    /// If `Some`, overrides the global `allowlist` setting for this source.
574    /// If `None`, uses the global default. Only used when `follow_links` is `Allowlist`.
575    pub allowlist: Option<Vec<String>>,
576}
577
578/// Per-source indexing parameter overrides.
579///
580/// These settings control how the documentation is processed and indexed
581/// for this specific source, overriding global defaults where specified.
582#[derive(Debug, Clone, Serialize, Deserialize)]
583pub struct IndexConfig {
584    /// Maximum lines to include in a single heading block.
585    ///
586    /// Controls how large sections are broken up during indexing. Larger values
587    /// include more context but may reduce search precision. Smaller values
588    /// provide more focused results but may split related content.
589    ///
590    /// If `None`, uses a sensible default based on content analysis.
591    pub max_heading_block_lines: Option<usize>,
592
593    /// Override language filtering for this source.
594    ///
595    /// If `Some(true)`, non-English content will be filtered regardless of global default.
596    /// If `Some(false)`, all content will be retained regardless of global default.
597    /// If `None`, uses the global `filter_non_english` setting.
598    pub filter_non_english: Option<bool>,
599}
600
601impl ToolConfig {
602    /// Load per-source configuration from a file.
603    ///
604    /// Loads and parses a TOML configuration file for a specific documentation source.
605    /// The file should contain sections for `[meta]`, `[fetch]`, and `[index]`.
606    ///
607    /// # Arguments
608    ///
609    /// * `path` - Path to the configuration file (typically `settings.toml`)
610    ///
611    /// # Returns
612    ///
613    /// Returns the parsed configuration.
614    ///
615    /// # Errors
616    ///
617    /// Returns an error if:
618    /// - The file cannot be read (doesn't exist, permissions, I/O error)
619    /// - The file contains invalid TOML syntax
620    /// - The file contains invalid configuration values
621    /// - Required fields are missing (e.g., `meta.name`)
622    ///
623    /// # Examples
624    ///
625    /// ```rust,no_run
626    /// use blz_core::ToolConfig;
627    /// use std::path::Path;
628    ///
629    /// // Load source-specific configuration
630    /// let config_path = Path::new("sources/react/settings.toml");
631    /// let tool_config = ToolConfig::load(config_path)?;
632    ///
633    /// println!("Source: {}", tool_config.meta.name);
634    /// if let Some(refresh) = tool_config.fetch.refresh_hours {
635    ///     println!("Custom refresh interval: {} hours", refresh);
636    /// }
637    /// # Ok::<(), blz_core::Error>(())
638    /// ```
639    pub fn load(path: &Path) -> Result<Self> {
640        let content = fs::read_to_string(path)
641            .map_err(|e| Error::Config(format!("Failed to read tool config: {e}")))?;
642        toml::from_str(&content)
643            .map_err(|e| Error::Config(format!("Failed to parse tool config: {e}")))
644    }
645
646    /// Save per-source configuration to a file.
647    ///
648    /// Serializes the configuration to TOML format and writes it to the specified path.
649    /// The parent directory must already exist.
650    ///
651    /// # Arguments
652    ///
653    /// * `path` - Path where to save the configuration file
654    ///
655    /// # Errors
656    ///
657    /// Returns an error if:
658    /// - The configuration cannot be serialized to TOML
659    /// - The parent directory doesn't exist
660    /// - The file cannot be written (permissions, disk space, I/O error)
661    ///
662    /// # Examples
663    ///
664    /// ```rust,no_run
665    /// use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
666    /// use std::path::Path;
667    ///
668    /// let config = ToolConfig {
669    ///     meta: ToolMeta {
670    ///         name: "my-docs".to_string(),
671    ///         display_name: Some("My Documentation".to_string()),
672    ///         homepage: None,
673    ///         repo: None,
674    ///     },
675    ///     fetch: FetchConfig {
676    ///         refresh_hours: Some(6),
677    ///         follow_links: None,
678    ///         allowlist: None,
679    ///     },
680    ///     index: IndexConfig {
681    ///         max_heading_block_lines: Some(300),
682    ///         filter_non_english: None,
683    ///     },
684    /// };
685    ///
686    /// let config_path = Path::new("my-docs/settings.toml");
687    /// config.save(config_path)?;
688    /// # Ok::<(), blz_core::Error>(())
689    /// ```
690    pub fn save(&self, path: &Path) -> Result<()> {
691        let content = toml::to_string_pretty(self)
692            .map_err(|e| Error::Config(format!("Failed to serialize tool config: {e}")))?;
693        fs::write(path, content)
694            .map_err(|e| Error::Config(format!("Failed to write tool config: {e}")))?;
695        Ok(())
696    }
697}
698
699#[cfg(test)]
700#[allow(
701    clippy::panic,
702    clippy::disallowed_macros,
703    clippy::unwrap_used,
704    clippy::unnecessary_wraps
705)]
706mod tests {
707    use super::*;
708    use proptest::prelude::*;
709    use std::fs;
710    use tempfile::TempDir;
711
712    // Test fixtures
713    fn create_test_config() -> Config {
714        Config {
715            defaults: DefaultsConfig {
716                refresh_hours: 12,
717                max_archives: 5,
718                fetch_enabled: true,
719                follow_links: FollowLinks::Allowlist,
720                allowlist: vec!["example.com".to_string(), "docs.rs".to_string()],
721                filter_non_english: true,
722            },
723            paths: PathsConfig {
724                root: PathBuf::from("/tmp/test"),
725            },
726        }
727    }
728
729    fn create_test_tool_config() -> ToolConfig {
730        ToolConfig {
731            meta: ToolMeta {
732                name: "test-tool".to_string(),
733                display_name: Some("Test Tool".to_string()),
734                homepage: Some("https://test.com".to_string()),
735                repo: Some("https://github.com/test/tool".to_string()),
736            },
737            fetch: FetchConfig {
738                refresh_hours: Some(6),
739                follow_links: Some(FollowLinks::FirstParty),
740                allowlist: Some(vec!["allowed.com".to_string()]),
741            },
742            index: IndexConfig {
743                max_heading_block_lines: Some(100),
744                filter_non_english: None,
745            },
746        }
747    }
748
749    #[test]
750    fn test_default_config_values() {
751        // Given: Default configuration is requested
752        let config = Config::default();
753
754        // When: Examining default values
755        // Then: Should have sensible defaults
756        assert_eq!(config.defaults.refresh_hours, 24);
757        assert_eq!(config.defaults.max_archives, 10);
758        assert!(config.defaults.fetch_enabled);
759        assert!(matches!(
760            config.defaults.follow_links,
761            FollowLinks::FirstParty
762        ));
763        assert!(config.defaults.allowlist.is_empty());
764        assert!(config.defaults.filter_non_english);
765        assert!(!config.paths.root.as_os_str().is_empty());
766    }
767
768    #[test]
769    fn test_follow_links_serialization() -> Result<()> {
770        // Given: Different FollowLinks variants
771        let variants = vec![
772            FollowLinks::None,
773            FollowLinks::FirstParty,
774            FollowLinks::Allowlist,
775        ];
776
777        for variant in variants {
778            // When: Serializing and deserializing
779            let serialized = serde_json::to_string(&variant)?;
780            let deserialized: FollowLinks = serde_json::from_str(&serialized)?;
781
782            // Then: Should round-trip correctly
783            assert_eq!(variant, deserialized, "Round-trip failed for {variant:?}");
784        }
785        Ok(())
786    }
787
788    #[test]
789    fn test_config_save_and_load_roundtrip() -> Result<()> {
790        // Given: A temporary directory and test configuration
791        let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
792        let config_path = temp_dir.path().join("test_config.toml");
793        let original_config = create_test_config();
794
795        // When: Saving and then loading the configuration
796        let content = toml::to_string_pretty(&original_config)
797            .map_err(|e| Error::Config(format!("Failed to serialize: {e}")))?;
798        fs::write(&config_path, content)
799            .map_err(|e| Error::Config(format!("Failed to write: {e}")))?;
800
801        let loaded_config: Config = {
802            let content = fs::read_to_string(&config_path)
803                .map_err(|e| Error::Config(format!("Failed to read: {e}")))?;
804            toml::from_str(&content).map_err(|e| Error::Config(format!("Failed to parse: {e}")))?
805        };
806
807        // Then: Configurations should be identical
808        assert_eq!(
809            loaded_config.defaults.refresh_hours,
810            original_config.defaults.refresh_hours
811        );
812        assert_eq!(
813            loaded_config.defaults.max_archives,
814            original_config.defaults.max_archives
815        );
816        assert_eq!(
817            loaded_config.defaults.fetch_enabled,
818            original_config.defaults.fetch_enabled
819        );
820        assert_eq!(
821            loaded_config.defaults.allowlist,
822            original_config.defaults.allowlist
823        );
824        assert_eq!(loaded_config.paths.root, original_config.paths.root);
825
826        Ok(())
827    }
828
829    #[test]
830    fn test_config_load_missing_file() {
831        // Given: A non-existent config file path
832        let non_existent = PathBuf::from("/definitely/does/not/exist/config.toml");
833
834        // When: Attempting to load config
835        let result = (|| -> Result<Config> {
836            let content = fs::read_to_string(&non_existent)
837                .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
838            toml::from_str(&content)
839                .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
840        })();
841
842        // Then: Should return appropriate error
843        assert!(result.is_err());
844        match result {
845            Err(Error::Config(msg)) => assert!(msg.contains("Failed to read config")),
846            _ => unreachable!("Expected Config error"),
847        }
848    }
849
850    #[test]
851    fn test_config_parse_invalid_toml() {
852        // Given: Invalid TOML content
853        let temp_dir = TempDir::new().expect("Failed to create temp dir");
854        let config_path = temp_dir.path().join("invalid.toml");
855        fs::write(&config_path, "this is not valid toml [[[").expect("Failed to write test file");
856
857        // When: Attempting to parse
858        let result = (|| -> Result<Config> {
859            let content = fs::read_to_string(&config_path)
860                .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
861            toml::from_str(&content)
862                .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
863        })();
864
865        // Then: Should return parse error
866        assert!(result.is_err());
867        if let Err(Error::Config(msg)) = result {
868            assert!(msg.contains("Failed to parse config"));
869        } else {
870            panic!("Expected Config parse error");
871        }
872    }
873
874    #[test]
875    fn test_config_save_creates_directory() -> Result<()> {
876        // Given: A temporary directory and nested config path
877        let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
878        let nested_path = temp_dir
879            .path()
880            .join("nested")
881            .join("deeper")
882            .join("config.toml");
883        let config = create_test_config();
884
885        // When: Saving config to nested path (simulating Config::save logic)
886        let parent = nested_path
887            .parent()
888            .ok_or_else(|| Error::Config("Invalid config path".into()))?;
889        fs::create_dir_all(parent)
890            .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
891
892        let content = toml::to_string_pretty(&config)
893            .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
894        fs::write(&nested_path, content)
895            .map_err(|e| Error::Config(format!("Failed to write config: {e}")))?;
896
897        // Then: Directory should be created and file should exist
898        assert!(nested_path.exists());
899        assert!(
900            nested_path
901                .parent()
902                .expect("path should have parent")
903                .exists()
904        );
905
906        Ok(())
907    }
908
909    #[test]
910    fn test_tool_config_roundtrip() -> Result<()> {
911        // Given: A temporary file and test tool configuration
912        let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
913        let config_path = temp_dir.path().join("tool.toml");
914        let original_config = create_test_tool_config();
915
916        // When: Saving and loading the tool configuration
917        original_config.save(&config_path)?;
918        let loaded_config = ToolConfig::load(&config_path)?;
919
920        // Then: Configurations should be identical
921        assert_eq!(loaded_config.meta.name, original_config.meta.name);
922        assert_eq!(
923            loaded_config.meta.display_name,
924            original_config.meta.display_name
925        );
926        assert_eq!(loaded_config.meta.homepage, original_config.meta.homepage);
927        assert_eq!(loaded_config.meta.repo, original_config.meta.repo);
928        assert_eq!(
929            loaded_config.fetch.refresh_hours,
930            original_config.fetch.refresh_hours
931        );
932        assert_eq!(
933            loaded_config.fetch.allowlist,
934            original_config.fetch.allowlist
935        );
936        assert_eq!(
937            loaded_config.index.max_heading_block_lines,
938            original_config.index.max_heading_block_lines
939        );
940
941        Ok(())
942    }
943
944    #[test]
945    fn test_tool_config_load_nonexistent_file() {
946        // Given: A non-existent file path
947        let non_existent = PathBuf::from("/does/not/exist/tool.toml");
948
949        // When: Attempting to load
950        let result = ToolConfig::load(&non_existent);
951
952        // Then: Should return appropriate error
953        assert!(result.is_err());
954        if let Err(Error::Config(msg)) = result {
955            assert!(msg.contains("Failed to read tool config"));
956        } else {
957            panic!("Expected Config error");
958        }
959    }
960
961    #[test]
962    fn test_config_with_extreme_values() -> Result<()> {
963        // Given: Configuration with extreme but valid values (avoiding serialization limits)
964        let extreme_config = Config {
965            defaults: DefaultsConfig {
966                refresh_hours: 1_000_000, // Large but not MAX to avoid TOML issues
967                max_archives: 1_000_000,  // Large but not MAX to avoid TOML issues
968                fetch_enabled: false,
969                follow_links: FollowLinks::None,
970                allowlist: vec!["a".repeat(1000)], // Very long domain
971                filter_non_english: false,
972            },
973            paths: PathsConfig {
974                root: PathBuf::from("/".repeat(100)), // Very long path
975            },
976        };
977
978        // When: Serializing and deserializing
979        let serialized = toml::to_string_pretty(&extreme_config)
980            .map_err(|e| Error::Config(format!("Serialize failed: {e}")))?;
981        let deserialized: Config = toml::from_str(&serialized)
982            .map_err(|e| Error::Config(format!("Deserialize failed: {e}")))?;
983
984        // Then: Should handle extreme values correctly
985        assert_eq!(deserialized.defaults.refresh_hours, 1_000_000);
986        assert_eq!(deserialized.defaults.max_archives, 1_000_000);
987        assert!(!deserialized.defaults.fetch_enabled);
988        assert_eq!(deserialized.defaults.allowlist.len(), 1);
989        assert_eq!(deserialized.defaults.allowlist[0].len(), 1000);
990
991        Ok(())
992    }
993
994    #[test]
995    fn test_config_empty_allowlist() -> Result<()> {
996        // Given: Configuration with empty allowlist
997        let config = Config {
998            defaults: DefaultsConfig {
999                refresh_hours: 24,
1000                max_archives: 10,
1001                fetch_enabled: true,
1002                follow_links: FollowLinks::Allowlist,
1003                allowlist: vec![], // Empty allowlist
1004                filter_non_english: true,
1005            },
1006            paths: PathsConfig {
1007                root: PathBuf::from("/tmp"),
1008            },
1009        };
1010
1011        // When: Serializing and deserializing
1012        let serialized = toml::to_string_pretty(&config)?;
1013        let deserialized: Config = toml::from_str(&serialized)?;
1014
1015        // Then: Empty allowlist should be preserved
1016        assert!(deserialized.defaults.allowlist.is_empty());
1017        assert!(matches!(
1018            deserialized.defaults.follow_links,
1019            FollowLinks::Allowlist
1020        ));
1021
1022        Ok(())
1023    }
1024
1025    #[test]
1026    fn test_defaults_config_backward_compatibility_filter_non_english() -> Result<()> {
1027        // Given: Configuration TOML without filter_non_english field (backward compatibility)
1028        let toml_without_filter = r#"
1029            [defaults]
1030            refresh_hours = 24
1031            max_archives = 10
1032            fetch_enabled = true
1033            follow_links = "first_party"
1034            allowlist = []
1035
1036            [paths]
1037            root = "/tmp/test"
1038        "#;
1039
1040        // When: Deserializing old config
1041        let config: Config = toml::from_str(toml_without_filter)
1042            .map_err(|e| Error::Config(format!("Failed to parse: {e}")))?;
1043
1044        // Then: Should use default value (true)
1045        assert!(config.defaults.filter_non_english);
1046        assert_eq!(config.defaults.refresh_hours, 24);
1047
1048        Ok(())
1049    }
1050
1051    #[test]
1052    fn test_index_config_backward_compatibility_filter_non_english() -> Result<()> {
1053        // Given: IndexConfig without filter_non_english field (backward compatibility)
1054        let config = IndexConfig {
1055            max_heading_block_lines: Some(500),
1056            filter_non_english: None,
1057        };
1058
1059        // When: Serializing and deserializing
1060        let serialized = serde_json::to_string(&config)
1061            .map_err(|e| Error::Config(format!("Failed to serialize: {e}")))?;
1062        let deserialized: IndexConfig = serde_json::from_str(&serialized)
1063            .map_err(|e| Error::Config(format!("Failed to deserialize: {e}")))?;
1064
1065        // Then: None should be preserved (uses global default)
1066        assert_eq!(deserialized.filter_non_english, None);
1067        assert_eq!(deserialized.max_heading_block_lines, Some(500));
1068
1069        Ok(())
1070    }
1071
1072    #[test]
1073    fn test_filter_non_english_serialization() -> Result<()> {
1074        // Given: Config with filter_non_english explicitly set to false
1075        let config = Config {
1076            defaults: DefaultsConfig {
1077                refresh_hours: 24,
1078                max_archives: 10,
1079                fetch_enabled: true,
1080                follow_links: FollowLinks::FirstParty,
1081                allowlist: vec![],
1082                filter_non_english: false,
1083            },
1084            paths: PathsConfig {
1085                root: PathBuf::from("/tmp"),
1086            },
1087        };
1088
1089        // When: Serializing and deserializing
1090        let serialized = toml::to_string_pretty(&config)?;
1091        let deserialized: Config = toml::from_str(&serialized)?;
1092
1093        // Then: false value should be preserved
1094        assert!(!deserialized.defaults.filter_non_english);
1095
1096        Ok(())
1097    }
1098
1099    // Property-based tests
1100    proptest! {
1101        #[test]
1102        fn test_config_refresh_hours_roundtrip(refresh_hours in 1u32..=365*24) {
1103            let config = Config {
1104                defaults: DefaultsConfig {
1105                    refresh_hours,
1106                    max_archives: 10,
1107                    fetch_enabled: true,
1108                    follow_links: FollowLinks::FirstParty,
1109                    allowlist: vec![],
1110                    filter_non_english: true,
1111                },
1112                paths: PathsConfig {
1113                    root: PathBuf::from("/tmp"),
1114                },
1115            };
1116
1117            let serialized = toml::to_string_pretty(&config).expect("should serialize");
1118            let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1119
1120            prop_assert_eq!(deserialized.defaults.refresh_hours, refresh_hours);
1121        }
1122
1123        #[test]
1124        fn test_config_max_archives_roundtrip(max_archives in 1usize..=1000) {
1125            let config = Config {
1126                defaults: DefaultsConfig {
1127                    refresh_hours: 24,
1128                    max_archives,
1129                    fetch_enabled: true,
1130                    follow_links: FollowLinks::FirstParty,
1131                    allowlist: vec![],
1132                    filter_non_english: true,
1133                },
1134                paths: PathsConfig {
1135                    root: PathBuf::from("/tmp"),
1136                },
1137            };
1138
1139            let serialized = toml::to_string_pretty(&config).expect("should serialize");
1140            let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1141
1142            prop_assert_eq!(deserialized.defaults.max_archives, max_archives);
1143        }
1144
1145        #[test]
1146        fn test_config_allowlist_roundtrip(allowlist in prop::collection::vec(r"[a-z0-9\.-]+", 0..=10)) {
1147            let config = Config {
1148                defaults: DefaultsConfig {
1149                    refresh_hours: 24,
1150                    max_archives: 10,
1151                    fetch_enabled: true,
1152                    follow_links: FollowLinks::Allowlist,
1153                    allowlist: allowlist.clone(),
1154                    filter_non_english: true,
1155                },
1156                paths: PathsConfig {
1157                    root: PathBuf::from("/tmp"),
1158                },
1159            };
1160
1161            let serialized = toml::to_string_pretty(&config).expect("should serialize");
1162            let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1163
1164            prop_assert_eq!(deserialized.defaults.allowlist, allowlist);
1165        }
1166    }
1167
1168    /*
1169    // Security-focused tests
1170    #[test]
1171    fn test_config_path_traversal_prevention() {
1172            // Given: Config with potentially malicious paths
1173            let malicious_paths = vec![
1174                "../../../etc/passwd",
1175                "..\\..\\..\\windows\\system32",
1176                "/etc/shadow",
1177                "../../.ssh/id_rsa",
1178            ];
1179
1180            for malicious_path in malicious_paths {
1181                // When: Creating config with malicious path
1182                let config = Config {
1183                    defaults: DefaultsConfig {
1184                        refresh_hours: 24,
1185                        max_archives: 10,
1186                        fetch_enabled: true,
1187                        follow_links: FollowLinks::FirstParty,
1188                        allowlist: vec![],
1189                    },
1190                    paths: PathsConfig {
1191                        root: PathBuf::from(malicious_path),
1192                    },
1193                };
1194
1195                // Then: Should still serialize/deserialize (path validation is separate)
1196                let serialized = toml::to_string_pretty(&config).expect("should serialize");
1197                let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1198                assert_eq!(deserialized.paths.root, PathBuf::from(malicious_path));
1199            }
1200        }
1201
1202        #[test]
1203        fn test_config_malicious_toml_injection() {
1204            // Given: Potentially malicious TOML strings that could break parsing
1205            let malicious_strings = vec![
1206                "\n[malicious]\nkey = \"value\"",
1207                "\"quotes\"in\"weird\"places",
1208                "key = \"value\"\n[new_section]",
1209                "unicode = \"\\u0000\\u0001\\u0002\"",
1210            ];
1211
1212            for malicious_string in malicious_strings {
1213                // When: Setting allowlist with potentially malicious content
1214                let config = Config {
1215                    defaults: DefaultsConfig {
1216                        refresh_hours: 24,
1217                        max_archives: 10,
1218                        fetch_enabled: true,
1219                        follow_links: FollowLinks::Allowlist,
1220                        allowlist: vec![malicious_string.to_string()],
1221                    },
1222                    paths: PathsConfig {
1223                        root: PathBuf::from("/tmp"),
1224                    },
1225                };
1226
1227                // Then: Should serialize safely (TOML library handles escaping)
1228                let result = toml::to_string_pretty(&config);
1229                assert!(
1230                    result.is_ok(),
1231                    "Failed to serialize config with: {malicious_string}"
1232                );
1233
1234                if let Ok(serialized) = result {
1235                    let deserialized_result: std::result::Result<Config, _> =
1236                        toml::from_str(&serialized);
1237                    assert!(
1238                        deserialized_result.is_ok(),
1239                        "Failed to deserialize config with: {malicious_string}"
1240                    );
1241                }
1242            }
1243        }
1244
1245        #[test]
1246        fn test_config_unicode_handling() -> Result<()> {
1247            // Given: Configuration with Unicode content
1248            let unicode_config = Config {
1249                defaults: DefaultsConfig {
1250                    refresh_hours: 24,
1251                    max_archives: 10,
1252                    fetch_enabled: true,
1253                    follow_links: FollowLinks::Allowlist,
1254                    allowlist: vec![
1255                        "例え.com".to_string(),    // Japanese
1256                        "مثال.com".to_string(),    // Arabic
1257                        "пример.com".to_string(),  // Cyrillic
1258                        "🚀.test.com".to_string(), // Emoji
1259                    ],
1260                },
1261                paths: PathsConfig {
1262                    root: PathBuf::from("/tmp/测试"), // Chinese characters
1263                },
1264            };
1265
1266            // When: Serializing and deserializing
1267            let serialized = toml::to_string_pretty(&unicode_config)?;
1268            let deserialized: Config = toml::from_str(&serialized)?;
1269
1270            // Then: Unicode should be preserved correctly
1271            assert_eq!(deserialized.defaults.allowlist.len(), 4);
1272            assert!(
1273                deserialized
1274                    .defaults
1275                    .allowlist
1276                    .contains(&"例え.com".to_string())
1277            );
1278            assert!(
1279                deserialized
1280                    .defaults
1281                    .allowlist
1282                    .contains(&"🚀.test.com".to_string())
1283            );
1284            assert_eq!(deserialized.paths.root, PathBuf::from("/tmp/测试"));
1285
1286            Ok(())
1287        }
1288
1289        #[test]
1290        fn test_config_edge_case_empty_values() -> Result<()> {
1291            // Given: Configuration with empty values
1292            let empty_config = Config {
1293                defaults: DefaultsConfig {
1294                    refresh_hours: 0, // Edge case: zero refresh
1295                    max_archives: 0,  // Edge case: no archives
1296                    fetch_enabled: false,
1297                    follow_links: FollowLinks::None,
1298                    allowlist: vec![String::new()], // Empty string in allowlist
1299                },
1300                paths: PathsConfig {
1301                    root: PathBuf::from(""), // Empty path
1302                },
1303            };
1304
1305            // When: Serializing and deserializing
1306            let serialized = toml::to_string_pretty(&empty_config)?;
1307            let deserialized: Config = toml::from_str(&serialized)?;
1308
1309            // Then: Empty/zero values should be handled correctly
1310            assert_eq!(deserialized.defaults.refresh_hours, 0);
1311            assert_eq!(deserialized.defaults.max_archives, 0);
1312            assert_eq!(deserialized.defaults.allowlist.len(), 1);
1313            assert_eq!(deserialized.defaults.allowlist[0], "");
1314            assert_eq!(deserialized.paths.root, PathBuf::from(""));
1315
1316            Ok(())
1317        }
1318    }
1319    */
1320}