blz_core/config.rs
1//! Configuration management for blz cache system.
2//!
3//! This module provides hierarchical configuration with global defaults and per-source overrides.
4//! Configuration is stored in TOML format and supports environment variable overrides.
5//!
6//! ## Configuration Hierarchy
7//!
8//! 1. **Global config**: Platform-specific config directory (see `GlobalConfig` docs)
9//! 2. **Per-source config**: `<source_dir>/settings.toml`
10//! 3. **Environment variables**: `CACHE_*` prefix
11//!
12//! ## Examples
13//!
14//! ### Loading global configuration:
15//!
16//! ```rust
17//! use blz_core::{Config, Result};
18//!
19//! // Load from default location or create with defaults
20//! let config = Config::load()?;
21//! println!("Cache root: {}", config.paths.root.display());
22//! println!("Refresh interval: {} hours", config.defaults.refresh_hours);
23//! # Ok::<(), blz_core::Error>(())
24//! ```
25//!
26//! ### Working with tool-specific configuration:
27//!
28//! ```rust,no_run
29//! use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
30//! use std::path::Path;
31//!
32//! let tool_config = ToolConfig {
33//! meta: ToolMeta {
34//! name: "react".to_string(),
35//! display_name: Some("React Documentation".to_string()),
36//! homepage: Some("https://react.dev".to_string()),
37//! repo: Some("https://github.com/facebook/react".to_string()),
38//! },
39//! fetch: FetchConfig {
40//! refresh_hours: Some(12), // Override global default
41//! follow_links: None, // Use global default
42//! allowlist: None, // Use global default
43//! },
44//! index: IndexConfig {
45//! max_heading_block_lines: Some(500),
46//! },
47//! };
48//!
49//! // Save to file
50//! tool_config.save(Path::new("react/settings.toml"))?;
51//! # Ok::<(), blz_core::Error>(())
52//! ```
53
54use crate::{Error, Result, profile};
55use serde::{Deserialize, Serialize};
56use std::fs;
57use std::path::{Path, PathBuf};
58
59/// Global configuration for the blz cache system.
60///
61/// Contains default settings that apply to all sources unless overridden by per-source configuration.
62/// Configuration is automatically loaded from the system config directory or created with sensible defaults.
63///
64/// ## File Location
65///
66/// The configuration file is stored at (searched in order):
67/// - XDG: `$XDG_CONFIG_HOME/blz/config.toml` or `~/.config/blz/config.toml`
68/// - Dotfile fallback: `~/.blz/config.toml`
69///
70/// A `config.local.toml` in the same directory overrides keys from `config.toml`.
71///
72/// ## Example Configuration File
73///
74/// ```toml
75/// [defaults]
76/// refresh_hours = 24
77/// max_archives = 10
78/// fetch_enabled = true
79/// follow_links = "first_party"
80/// allowlist = ["docs.rs", "developer.mozilla.org"]
81///
82/// [paths]
83/// root = "/home/user/.outfitter/blz"
84/// ```
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct Config {
87 /// Default settings for all sources
88 pub defaults: DefaultsConfig,
89 /// File system paths configuration
90 pub paths: PathsConfig,
91}
92
93/// Default settings that apply to all sources unless overridden.
94///
95/// These settings control fetching behavior, caching policies, and link following rules.
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct DefaultsConfig {
98 /// How often to refresh cached content (in hours).
99 ///
100 /// Sources are only re-fetched if they haven't been updated within this interval.
101 /// Set to 0 to always fetch on access.
102 pub refresh_hours: u32,
103
104 /// Maximum number of archived versions to keep per source.
105 ///
106 /// When a source is updated, the previous version is archived. This setting
107 /// controls how many historical versions to retain for diff generation.
108 pub max_archives: usize,
109
110 /// Whether fetching from remote sources is enabled.
111 ///
112 /// When disabled, only locally cached content is used. Useful for offline work
113 /// or environments with restricted network access.
114 pub fetch_enabled: bool,
115
116 /// Policy for following links in llms.txt files.
117 ///
118 /// Controls whether and which external links should be followed when processing
119 /// llms.txt files that contain references to other documentation sources.
120 pub follow_links: FollowLinks,
121
122 /// Domains allowed for link following.
123 ///
124 /// Only used when `follow_links` is set to `Allowlist`. Links to domains
125 /// not in this list will be ignored.
126 pub allowlist: Vec<String>,
127}
128
129/// Policy for following external links in llms.txt files.
130///
131/// This controls how the system handles links to other documentation sources
132/// within llms.txt files.
133#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
134#[serde(rename_all = "snake_case")]
135pub enum FollowLinks {
136 /// Never follow external links.
137 ///
138 /// Only process the original llms.txt file, ignoring any links to other sources.
139 None,
140
141 /// Follow links to the same domain and its immediate subdomains.
142 ///
143 /// For example, if processing `docs.example.com/llms.txt`, links to
144 /// `api.example.com/docs` or `example.com/guide` would be followed,
145 /// but `other-site.com/docs` would be ignored.
146 FirstParty,
147
148 /// Only follow links to domains in the allowlist.
149 ///
150 /// Use the `allowlist` field in `DefaultsConfig` to specify which domains
151 /// are permitted. This provides fine-grained control over which external
152 /// sources are trusted.
153 Allowlist,
154}
155
156/// File system paths configuration.
157///
158/// Defines where cached content, indices, and metadata are stored on the local filesystem.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct PathsConfig {
161 /// Root directory for all cached content.
162 ///
163 /// Each source gets its own subdirectory under this root. The directory
164 /// structure is: `root/<source_alias>/`
165 ///
166 /// Default locations:
167 /// - Linux: `~/.local/share/blz`
168 /// - macOS: `~/Library/Application Support/dev.outfitter.blz`
169 /// - Windows: `%APPDATA%\outfitter\blz`
170 pub root: PathBuf,
171}
172
173impl Config {
174 /// Load configuration from the default location or create with defaults.
175 ///
176 /// This method attempts to load the configuration from the system config directory.
177 /// If the file doesn't exist, it returns a configuration with sensible defaults.
178 /// If the file exists but is malformed, it returns an error.
179 ///
180 /// # Returns
181 ///
182 /// Returns the loaded configuration or a default configuration if no file exists.
183 ///
184 /// # Errors
185 ///
186 /// Returns an error if:
187 /// - The config directory cannot be determined (unsupported platform)
188 /// - The config file exists but cannot be read (permissions, I/O error)
189 /// - The config file exists but contains invalid TOML syntax
190 /// - The config file exists but contains invalid configuration values
191 ///
192 /// # Examples
193 ///
194 /// ```rust
195 /// use blz_core::Config;
196 ///
197 /// // Load existing config or create with defaults
198 /// let config = Config::load()?;
199 ///
200 /// if config.defaults.fetch_enabled {
201 /// println!("Fetching is enabled");
202 /// }
203 /// # Ok::<(), blz_core::Error>(())
204 /// ```
205 pub fn load() -> Result<Self> {
206 // Determine base config path (BLZ_CONFIG/BLZ_CONFIG_DIR, XDG, dotfile), or use defaults
207 let base_path = Self::existing_config_path()?;
208
209 // Load base
210 let mut base_value: toml::Value = if let Some(ref path) = base_path {
211 let content = fs::read_to_string(path)
212 .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
213 toml::from_str(&content)
214 .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))?
215 } else {
216 let default_str = toml::to_string(&Self::default())
217 .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?;
218 toml::from_str(&default_str)
219 .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?
220 };
221
222 // Merge optional local override next to resolved base directory
223 let base_dir = base_path.as_deref().map_or_else(
224 || {
225 Self::canonical_config_path().map_or_else(
226 |_| PathBuf::new(),
227 |p| p.parent().map(Path::to_path_buf).unwrap_or_default(),
228 )
229 },
230 |bp| bp.parent().map(Path::to_path_buf).unwrap_or_default(),
231 );
232
233 let local_path = base_dir.join("config.local.toml");
234 if local_path.exists() {
235 let content = fs::read_to_string(&local_path)
236 .map_err(|e| Error::Config(format!("Failed to read local config: {e}")))?;
237 let local_value: toml::Value = toml::from_str(&content)
238 .map_err(|e| Error::Config(format!("Failed to parse local config: {e}")))?;
239 Self::merge_toml(&mut base_value, &local_value);
240 }
241
242 // Deserialize
243 let mut config: Self = base_value
244 .try_into()
245 .map_err(|e| Error::Config(format!("Failed to materialize config: {e}")))?;
246
247 // Apply env overrides
248 config.apply_env_overrides();
249
250 Ok(config)
251 }
252
253 /// Save the configuration to the default location.
254 ///
255 /// This method serializes the configuration to TOML format and writes it to
256 /// the system config directory. Parent directories are created if they don't exist.
257 ///
258 /// # Errors
259 ///
260 /// Returns an error if:
261 /// - The config directory cannot be determined (unsupported platform)
262 /// - Parent directories cannot be created (permissions, disk space)
263 /// - The configuration cannot be serialized to TOML
264 /// - The file cannot be written (permissions, disk space, I/O error)
265 ///
266 /// # Examples
267 ///
268 /// ```rust,no_run
269 /// use blz_core::{Config, DefaultsConfig, PathsConfig, FollowLinks};
270 /// use std::path::PathBuf;
271 ///
272 /// let mut config = Config::load()?;
273 /// config.defaults.refresh_hours = 12; // Update refresh interval
274 /// config.save()?; // Persist changes
275 /// # Ok::<(), blz_core::Error>(())
276 /// ```
277 pub fn save(&self) -> Result<()> {
278 let config_path = Self::save_target_path()?;
279 let parent = config_path
280 .parent()
281 .ok_or_else(|| Error::Config("Invalid config path".into()))?;
282
283 fs::create_dir_all(parent)
284 .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
285
286 let content = toml::to_string_pretty(self)
287 .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
288
289 let tmp = parent.join("config.toml.tmp");
290 fs::write(&tmp, &content)
291 .map_err(|e| Error::Config(format!("Failed to write temp config: {e}")))?;
292 // Best-effort atomic replace; on Windows, rename() replaces if target does not exist.
293 // SAFETY: config.toml write is replaced in one step to avoid torn files.
294 #[cfg(target_os = "windows")]
295 if config_path.exists() {
296 fs::remove_file(&config_path)
297 .map_err(|e| Error::Config(format!("Failed to remove existing config: {e}")))?;
298 }
299 std::fs::rename(&tmp, &config_path)
300 .map_err(|e| Error::Config(format!("Failed to replace config: {e}")))?;
301
302 Ok(())
303 }
304
305 /// Get the path where the global configuration file is stored.
306 ///
307 /// Uses the system-appropriate config directory based on the platform:
308 /// - Linux: `~/.config/blz/global.toml`
309 /// - macOS: `~/Library/Application Support/dev.outfitter.blz/global.toml`
310 /// - Windows: `%APPDATA%\outfitter\blz\global.toml`
311 ///
312 /// # Errors
313 ///
314 /// Returns an error if the system config directory cannot be determined,
315 /// which may happen on unsupported platforms or in sandboxed environments.
316 fn canonical_config_path() -> Result<PathBuf> {
317 let xdg = std::env::var("XDG_CONFIG_HOME")
318 .ok()
319 .map(PathBuf::from)
320 .or_else(|| directories::BaseDirs::new().map(|b| b.home_dir().join(".config")))
321 .ok_or_else(|| Error::Config("Failed to determine XDG config directory".into()))?;
322 Ok(xdg.join(profile::app_dir_slug()).join("config.toml"))
323 }
324
325 fn dotfile_config_path() -> Result<PathBuf> {
326 let home = directories::BaseDirs::new()
327 .map(|b| b.home_dir().to_path_buf())
328 .ok_or_else(|| Error::Config("Failed to determine home directory".into()))?;
329 Ok(home.join(profile::dot_dir_slug()).join("config.toml"))
330 }
331
332 fn existing_config_path() -> Result<Option<PathBuf>> {
333 // 1) BLZ_CONFIG (file)
334 if let Ok(explicit) = std::env::var("BLZ_CONFIG") {
335 let explicit = explicit.trim();
336 if !explicit.is_empty() {
337 let p = PathBuf::from(explicit);
338 if p.is_file() && p.exists() {
339 return Ok(Some(p));
340 }
341 }
342 }
343
344 // 2) BLZ_CONFIG_DIR (dir)
345 if let Ok(dir) = std::env::var("BLZ_CONFIG_DIR") {
346 let dir = dir.trim();
347 if !dir.is_empty() {
348 let p = PathBuf::from(dir).join("config.toml");
349 if p.is_file() && p.exists() {
350 return Ok(Some(p));
351 }
352 }
353 }
354
355 // 3) XDG
356 let xdg = Self::canonical_config_path()?;
357 if xdg.exists() {
358 return Ok(Some(xdg));
359 }
360 // 4) Dotfile
361 let dot = Self::dotfile_config_path()?;
362 if dot.exists() {
363 return Ok(Some(dot));
364 }
365 Ok(None)
366 }
367
368 fn save_target_path() -> Result<PathBuf> {
369 if let Some(existing) = Self::existing_config_path()? {
370 return Ok(existing);
371 }
372 Self::canonical_config_path()
373 }
374
375 fn merge_toml(dst: &mut toml::Value, src: &toml::Value) {
376 use toml::Value::Table;
377 match (dst, src) {
378 (Table(dst_tbl), Table(src_tbl)) => {
379 for (k, v) in src_tbl {
380 match dst_tbl.get_mut(k) {
381 Some(dst_v) => Self::merge_toml(dst_v, v),
382 None => {
383 dst_tbl.insert(k.clone(), v.clone());
384 },
385 }
386 }
387 },
388 (dst_v, src_v) => *dst_v = src_v.clone(),
389 }
390 }
391
392 fn apply_env_overrides(&mut self) {
393 if let Ok(v) = std::env::var("BLZ_REFRESH_HOURS") {
394 if let Ok(n) = v.parse::<u32>() {
395 self.defaults.refresh_hours = n;
396 }
397 }
398 if let Ok(v) = std::env::var("BLZ_MAX_ARCHIVES") {
399 if let Ok(n) = v.parse::<usize>() {
400 self.defaults.max_archives = n;
401 }
402 }
403 if let Ok(v) = std::env::var("BLZ_FETCH_ENABLED") {
404 let norm = v.to_ascii_lowercase();
405 self.defaults.fetch_enabled = matches!(norm.as_str(), "1" | "true" | "yes" | "on");
406 }
407 if let Ok(v) = std::env::var("BLZ_FOLLOW_LINKS") {
408 match v.to_ascii_lowercase().as_str() {
409 "none" => self.defaults.follow_links = FollowLinks::None,
410 "first_party" | "firstparty" => {
411 self.defaults.follow_links = FollowLinks::FirstParty;
412 },
413 "allowlist" => self.defaults.follow_links = FollowLinks::Allowlist,
414 _ => {},
415 }
416 }
417 if let Ok(v) = std::env::var("BLZ_ALLOWLIST") {
418 let list = v
419 .split(',')
420 .map(|s| s.trim().to_string())
421 .filter(|s| !s.is_empty())
422 .collect::<Vec<_>>();
423 if !list.is_empty() {
424 self.defaults.allowlist = list;
425 }
426 }
427 if let Ok(v) = std::env::var("BLZ_ROOT") {
428 let p = PathBuf::from(v);
429 if !p.as_os_str().is_empty() {
430 self.paths.root = p;
431 }
432 }
433 }
434}
435
436impl Default for Config {
437 fn default() -> Self {
438 Self {
439 defaults: DefaultsConfig {
440 refresh_hours: 24,
441 max_archives: 10,
442 fetch_enabled: true,
443 follow_links: FollowLinks::FirstParty,
444 allowlist: Vec::new(),
445 },
446 paths: PathsConfig {
447 root: directories::ProjectDirs::from("dev", "outfitter", profile::app_dir_slug())
448 .map_or_else(
449 || {
450 // Expand home directory properly
451 directories::BaseDirs::new().map_or_else(
452 || PathBuf::from(".outfitter").join(profile::app_dir_slug()),
453 |base| {
454 base.home_dir()
455 .join(".outfitter")
456 .join(profile::app_dir_slug())
457 },
458 )
459 },
460 |dirs| dirs.data_dir().to_path_buf(),
461 ),
462 },
463 }
464 }
465}
466
467/// Per-source configuration that overrides global defaults.
468///
469/// Each documentation source can have its own configuration file (`settings.toml`)
470/// that overrides the global configuration for that specific source. This allows
471/// fine-grained control over fetching behavior, indexing parameters, and metadata.
472///
473/// ## File Location
474///
475/// Stored as `<cache_root>/<source_alias>/settings.toml`
476///
477/// ## Example Configuration File
478///
479/// ```toml
480/// [meta]
481/// name = "react"
482/// display_name = "React Documentation"
483/// homepage = "https://react.dev"
484/// repo = "https://github.com/facebook/react"
485///
486/// [fetch]
487/// refresh_hours = 12 # Override global default
488/// follow_links = "first_party"
489/// allowlist = ["reactjs.org", "react.dev"]
490///
491/// [index]
492/// max_heading_block_lines = 500
493/// ```
494#[derive(Debug, Clone, Serialize, Deserialize)]
495pub struct ToolConfig {
496 /// Metadata about the documentation source
497 pub meta: ToolMeta,
498 /// Fetching behavior overrides
499 pub fetch: FetchConfig,
500 /// Indexing parameter overrides
501 pub index: IndexConfig,
502}
503
504/// Metadata about a documentation source.
505///
506/// This information is used for display purposes and to provide context
507/// about the source of documentation being cached.
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct ToolMeta {
510 /// Unique identifier for this source (used as directory name).
511 ///
512 /// Should be a valid filename that uniquely identifies the source.
513 /// Typically lowercase with hyphens (e.g., "react", "node-js", "rust-std").
514 pub name: String,
515
516 /// Human-readable display name for the source.
517 ///
518 /// Used in search results and UI displays. If not provided, the `name`
519 /// field is used as fallback.
520 pub display_name: Option<String>,
521
522 /// Homepage URL for the documentation source.
523 ///
524 /// The main website or documentation portal for this source.
525 /// Used for reference and linking back to the original documentation.
526 pub homepage: Option<String>,
527
528 /// Repository URL for the documentation source.
529 ///
530 /// Link to the source code repository, if available. Useful for
531 /// understanding the project context and accessing source code.
532 pub repo: Option<String>,
533}
534
535/// Per-source fetching behavior overrides.
536///
537/// These settings override the global defaults for fetching behavior.
538/// Any `None` values will use the corresponding global default setting.
539#[derive(Debug, Clone, Serialize, Deserialize)]
540pub struct FetchConfig {
541 /// Override for refresh interval in hours.
542 ///
543 /// If `Some`, overrides the global `refresh_hours` setting for this source.
544 /// If `None`, uses the global default.
545 pub refresh_hours: Option<u32>,
546
547 /// Override for link following policy.
548 ///
549 /// If `Some`, overrides the global `follow_links` setting for this source.
550 /// If `None`, uses the global default.
551 pub follow_links: Option<FollowLinks>,
552
553 /// Override for allowed domains list.
554 ///
555 /// If `Some`, overrides the global `allowlist` setting for this source.
556 /// If `None`, uses the global default. Only used when `follow_links` is `Allowlist`.
557 pub allowlist: Option<Vec<String>>,
558}
559
560/// Per-source indexing parameter overrides.
561///
562/// These settings control how the documentation is processed and indexed
563/// for this specific source, overriding global defaults where specified.
564#[derive(Debug, Clone, Serialize, Deserialize)]
565pub struct IndexConfig {
566 /// Maximum lines to include in a single heading block.
567 ///
568 /// Controls how large sections are broken up during indexing. Larger values
569 /// include more context but may reduce search precision. Smaller values
570 /// provide more focused results but may split related content.
571 ///
572 /// If `None`, uses a sensible default based on content analysis.
573 pub max_heading_block_lines: Option<usize>,
574}
575
576impl ToolConfig {
577 /// Load per-source configuration from a file.
578 ///
579 /// Loads and parses a TOML configuration file for a specific documentation source.
580 /// The file should contain sections for `[meta]`, `[fetch]`, and `[index]`.
581 ///
582 /// # Arguments
583 ///
584 /// * `path` - Path to the configuration file (typically `settings.toml`)
585 ///
586 /// # Returns
587 ///
588 /// Returns the parsed configuration.
589 ///
590 /// # Errors
591 ///
592 /// Returns an error if:
593 /// - The file cannot be read (doesn't exist, permissions, I/O error)
594 /// - The file contains invalid TOML syntax
595 /// - The file contains invalid configuration values
596 /// - Required fields are missing (e.g., `meta.name`)
597 ///
598 /// # Examples
599 ///
600 /// ```rust,no_run
601 /// use blz_core::ToolConfig;
602 /// use std::path::Path;
603 ///
604 /// // Load source-specific configuration
605 /// let config_path = Path::new("sources/react/settings.toml");
606 /// let tool_config = ToolConfig::load(config_path)?;
607 ///
608 /// println!("Source: {}", tool_config.meta.name);
609 /// if let Some(refresh) = tool_config.fetch.refresh_hours {
610 /// println!("Custom refresh interval: {} hours", refresh);
611 /// }
612 /// # Ok::<(), blz_core::Error>(())
613 /// ```
614 pub fn load(path: &Path) -> Result<Self> {
615 let content = fs::read_to_string(path)
616 .map_err(|e| Error::Config(format!("Failed to read tool config: {e}")))?;
617 toml::from_str(&content)
618 .map_err(|e| Error::Config(format!("Failed to parse tool config: {e}")))
619 }
620
621 /// Save per-source configuration to a file.
622 ///
623 /// Serializes the configuration to TOML format and writes it to the specified path.
624 /// The parent directory must already exist.
625 ///
626 /// # Arguments
627 ///
628 /// * `path` - Path where to save the configuration file
629 ///
630 /// # Errors
631 ///
632 /// Returns an error if:
633 /// - The configuration cannot be serialized to TOML
634 /// - The parent directory doesn't exist
635 /// - The file cannot be written (permissions, disk space, I/O error)
636 ///
637 /// # Examples
638 ///
639 /// ```rust,no_run
640 /// use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
641 /// use std::path::Path;
642 ///
643 /// let config = ToolConfig {
644 /// meta: ToolMeta {
645 /// name: "my-docs".to_string(),
646 /// display_name: Some("My Documentation".to_string()),
647 /// homepage: None,
648 /// repo: None,
649 /// },
650 /// fetch: FetchConfig {
651 /// refresh_hours: Some(6),
652 /// follow_links: None,
653 /// allowlist: None,
654 /// },
655 /// index: IndexConfig {
656 /// max_heading_block_lines: Some(300),
657 /// },
658 /// };
659 ///
660 /// let config_path = Path::new("my-docs/settings.toml");
661 /// config.save(config_path)?;
662 /// # Ok::<(), blz_core::Error>(())
663 /// ```
664 pub fn save(&self, path: &Path) -> Result<()> {
665 let content = toml::to_string_pretty(self)
666 .map_err(|e| Error::Config(format!("Failed to serialize tool config: {e}")))?;
667 fs::write(path, content)
668 .map_err(|e| Error::Config(format!("Failed to write tool config: {e}")))?;
669 Ok(())
670 }
671}
672
673#[cfg(test)]
674#[allow(
675 clippy::panic,
676 clippy::disallowed_macros,
677 clippy::unwrap_used,
678 clippy::unnecessary_wraps
679)]
680mod tests {
681 use super::*;
682 use proptest::prelude::*;
683 use std::fs;
684 use tempfile::TempDir;
685
686 // Test fixtures
687 fn create_test_config() -> Config {
688 Config {
689 defaults: DefaultsConfig {
690 refresh_hours: 12,
691 max_archives: 5,
692 fetch_enabled: true,
693 follow_links: FollowLinks::Allowlist,
694 allowlist: vec!["example.com".to_string(), "docs.rs".to_string()],
695 },
696 paths: PathsConfig {
697 root: PathBuf::from("/tmp/test"),
698 },
699 }
700 }
701
702 fn create_test_tool_config() -> ToolConfig {
703 ToolConfig {
704 meta: ToolMeta {
705 name: "test-tool".to_string(),
706 display_name: Some("Test Tool".to_string()),
707 homepage: Some("https://test.com".to_string()),
708 repo: Some("https://github.com/test/tool".to_string()),
709 },
710 fetch: FetchConfig {
711 refresh_hours: Some(6),
712 follow_links: Some(FollowLinks::FirstParty),
713 allowlist: Some(vec!["allowed.com".to_string()]),
714 },
715 index: IndexConfig {
716 max_heading_block_lines: Some(100),
717 },
718 }
719 }
720
721 #[test]
722 fn test_default_config_values() {
723 // Given: Default configuration is requested
724 let config = Config::default();
725
726 // When: Examining default values
727 // Then: Should have sensible defaults
728 assert_eq!(config.defaults.refresh_hours, 24);
729 assert_eq!(config.defaults.max_archives, 10);
730 assert!(config.defaults.fetch_enabled);
731 assert!(matches!(
732 config.defaults.follow_links,
733 FollowLinks::FirstParty
734 ));
735 assert!(config.defaults.allowlist.is_empty());
736 assert!(!config.paths.root.as_os_str().is_empty());
737 }
738
739 #[test]
740 fn test_follow_links_serialization() -> Result<()> {
741 // Given: Different FollowLinks variants
742 let variants = vec![
743 FollowLinks::None,
744 FollowLinks::FirstParty,
745 FollowLinks::Allowlist,
746 ];
747
748 for variant in variants {
749 // When: Serializing and deserializing
750 let serialized = serde_json::to_string(&variant)?;
751 let deserialized: FollowLinks = serde_json::from_str(&serialized)?;
752
753 // Then: Should round-trip correctly
754 assert_eq!(variant, deserialized, "Round-trip failed for {variant:?}");
755 }
756 Ok(())
757 }
758
759 #[test]
760 fn test_config_save_and_load_roundtrip() -> Result<()> {
761 // Given: A temporary directory and test configuration
762 let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
763 let config_path = temp_dir.path().join("test_config.toml");
764 let original_config = create_test_config();
765
766 // When: Saving and then loading the configuration
767 let content = toml::to_string_pretty(&original_config)
768 .map_err(|e| Error::Config(format!("Failed to serialize: {e}")))?;
769 fs::write(&config_path, content)
770 .map_err(|e| Error::Config(format!("Failed to write: {e}")))?;
771
772 let loaded_config: Config = {
773 let content = fs::read_to_string(&config_path)
774 .map_err(|e| Error::Config(format!("Failed to read: {e}")))?;
775 toml::from_str(&content).map_err(|e| Error::Config(format!("Failed to parse: {e}")))?
776 };
777
778 // Then: Configurations should be identical
779 assert_eq!(
780 loaded_config.defaults.refresh_hours,
781 original_config.defaults.refresh_hours
782 );
783 assert_eq!(
784 loaded_config.defaults.max_archives,
785 original_config.defaults.max_archives
786 );
787 assert_eq!(
788 loaded_config.defaults.fetch_enabled,
789 original_config.defaults.fetch_enabled
790 );
791 assert_eq!(
792 loaded_config.defaults.allowlist,
793 original_config.defaults.allowlist
794 );
795 assert_eq!(loaded_config.paths.root, original_config.paths.root);
796
797 Ok(())
798 }
799
800 #[test]
801 fn test_config_load_missing_file() {
802 // Given: A non-existent config file path
803 let non_existent = PathBuf::from("/definitely/does/not/exist/config.toml");
804
805 // When: Attempting to load config
806 let result = (|| -> Result<Config> {
807 let content = fs::read_to_string(&non_existent)
808 .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
809 toml::from_str(&content)
810 .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
811 })();
812
813 // Then: Should return appropriate error
814 assert!(result.is_err());
815 match result {
816 Err(Error::Config(msg)) => assert!(msg.contains("Failed to read config")),
817 _ => unreachable!("Expected Config error"),
818 }
819 }
820
821 #[test]
822 fn test_config_parse_invalid_toml() {
823 // Given: Invalid TOML content
824 let temp_dir = TempDir::new().expect("Failed to create temp dir");
825 let config_path = temp_dir.path().join("invalid.toml");
826 fs::write(&config_path, "this is not valid toml [[[").expect("Failed to write test file");
827
828 // When: Attempting to parse
829 let result = (|| -> Result<Config> {
830 let content = fs::read_to_string(&config_path)
831 .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
832 toml::from_str(&content)
833 .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
834 })();
835
836 // Then: Should return parse error
837 assert!(result.is_err());
838 if let Err(Error::Config(msg)) = result {
839 assert!(msg.contains("Failed to parse config"));
840 } else {
841 panic!("Expected Config parse error");
842 }
843 }
844
845 #[test]
846 fn test_config_save_creates_directory() -> Result<()> {
847 // Given: A temporary directory and nested config path
848 let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
849 let nested_path = temp_dir
850 .path()
851 .join("nested")
852 .join("deeper")
853 .join("config.toml");
854 let config = create_test_config();
855
856 // When: Saving config to nested path (simulating Config::save logic)
857 let parent = nested_path
858 .parent()
859 .ok_or_else(|| Error::Config("Invalid config path".into()))?;
860 fs::create_dir_all(parent)
861 .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
862
863 let content = toml::to_string_pretty(&config)
864 .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
865 fs::write(&nested_path, content)
866 .map_err(|e| Error::Config(format!("Failed to write config: {e}")))?;
867
868 // Then: Directory should be created and file should exist
869 assert!(nested_path.exists());
870 assert!(
871 nested_path
872 .parent()
873 .expect("path should have parent")
874 .exists()
875 );
876
877 Ok(())
878 }
879
880 #[test]
881 fn test_tool_config_roundtrip() -> Result<()> {
882 // Given: A temporary file and test tool configuration
883 let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
884 let config_path = temp_dir.path().join("tool.toml");
885 let original_config = create_test_tool_config();
886
887 // When: Saving and loading the tool configuration
888 original_config.save(&config_path)?;
889 let loaded_config = ToolConfig::load(&config_path)?;
890
891 // Then: Configurations should be identical
892 assert_eq!(loaded_config.meta.name, original_config.meta.name);
893 assert_eq!(
894 loaded_config.meta.display_name,
895 original_config.meta.display_name
896 );
897 assert_eq!(loaded_config.meta.homepage, original_config.meta.homepage);
898 assert_eq!(loaded_config.meta.repo, original_config.meta.repo);
899 assert_eq!(
900 loaded_config.fetch.refresh_hours,
901 original_config.fetch.refresh_hours
902 );
903 assert_eq!(
904 loaded_config.fetch.allowlist,
905 original_config.fetch.allowlist
906 );
907 assert_eq!(
908 loaded_config.index.max_heading_block_lines,
909 original_config.index.max_heading_block_lines
910 );
911
912 Ok(())
913 }
914
915 #[test]
916 fn test_tool_config_load_nonexistent_file() {
917 // Given: A non-existent file path
918 let non_existent = PathBuf::from("/does/not/exist/tool.toml");
919
920 // When: Attempting to load
921 let result = ToolConfig::load(&non_existent);
922
923 // Then: Should return appropriate error
924 assert!(result.is_err());
925 if let Err(Error::Config(msg)) = result {
926 assert!(msg.contains("Failed to read tool config"));
927 } else {
928 panic!("Expected Config error");
929 }
930 }
931
932 #[test]
933 fn test_config_with_extreme_values() -> Result<()> {
934 // Given: Configuration with extreme but valid values (avoiding serialization limits)
935 let extreme_config = Config {
936 defaults: DefaultsConfig {
937 refresh_hours: 1_000_000, // Large but not MAX to avoid TOML issues
938 max_archives: 1_000_000, // Large but not MAX to avoid TOML issues
939 fetch_enabled: false,
940 follow_links: FollowLinks::None,
941 allowlist: vec!["a".repeat(1000)], // Very long domain
942 },
943 paths: PathsConfig {
944 root: PathBuf::from("/".repeat(100)), // Very long path
945 },
946 };
947
948 // When: Serializing and deserializing
949 let serialized = toml::to_string_pretty(&extreme_config)
950 .map_err(|e| Error::Config(format!("Serialize failed: {e}")))?;
951 let deserialized: Config = toml::from_str(&serialized)
952 .map_err(|e| Error::Config(format!("Deserialize failed: {e}")))?;
953
954 // Then: Should handle extreme values correctly
955 assert_eq!(deserialized.defaults.refresh_hours, 1_000_000);
956 assert_eq!(deserialized.defaults.max_archives, 1_000_000);
957 assert!(!deserialized.defaults.fetch_enabled);
958 assert_eq!(deserialized.defaults.allowlist.len(), 1);
959 assert_eq!(deserialized.defaults.allowlist[0].len(), 1000);
960
961 Ok(())
962 }
963
964 #[test]
965 fn test_config_empty_allowlist() -> Result<()> {
966 // Given: Configuration with empty allowlist
967 let config = Config {
968 defaults: DefaultsConfig {
969 refresh_hours: 24,
970 max_archives: 10,
971 fetch_enabled: true,
972 follow_links: FollowLinks::Allowlist,
973 allowlist: vec![], // Empty allowlist
974 },
975 paths: PathsConfig {
976 root: PathBuf::from("/tmp"),
977 },
978 };
979
980 // When: Serializing and deserializing
981 let serialized = toml::to_string_pretty(&config)?;
982 let deserialized: Config = toml::from_str(&serialized)?;
983
984 // Then: Empty allowlist should be preserved
985 assert!(deserialized.defaults.allowlist.is_empty());
986 assert!(matches!(
987 deserialized.defaults.follow_links,
988 FollowLinks::Allowlist
989 ));
990
991 Ok(())
992 }
993
994 // Property-based tests
995 proptest! {
996 #[test]
997 fn test_config_refresh_hours_roundtrip(refresh_hours in 1u32..=365*24) {
998 let config = Config {
999 defaults: DefaultsConfig {
1000 refresh_hours,
1001 max_archives: 10,
1002 fetch_enabled: true,
1003 follow_links: FollowLinks::FirstParty,
1004 allowlist: vec![],
1005 },
1006 paths: PathsConfig {
1007 root: PathBuf::from("/tmp"),
1008 },
1009 };
1010
1011 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1012 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1013
1014 prop_assert_eq!(deserialized.defaults.refresh_hours, refresh_hours);
1015 }
1016
1017 #[test]
1018 fn test_config_max_archives_roundtrip(max_archives in 1usize..=1000) {
1019 let config = Config {
1020 defaults: DefaultsConfig {
1021 refresh_hours: 24,
1022 max_archives,
1023 fetch_enabled: true,
1024 follow_links: FollowLinks::FirstParty,
1025 allowlist: vec![],
1026 },
1027 paths: PathsConfig {
1028 root: PathBuf::from("/tmp"),
1029 },
1030 };
1031
1032 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1033 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1034
1035 prop_assert_eq!(deserialized.defaults.max_archives, max_archives);
1036 }
1037
1038 #[test]
1039 fn test_config_allowlist_roundtrip(allowlist in prop::collection::vec(r"[a-z0-9\.-]+", 0..=10)) {
1040 let config = Config {
1041 defaults: DefaultsConfig {
1042 refresh_hours: 24,
1043 max_archives: 10,
1044 fetch_enabled: true,
1045 follow_links: FollowLinks::Allowlist,
1046 allowlist: allowlist.clone(),
1047 },
1048 paths: PathsConfig {
1049 root: PathBuf::from("/tmp"),
1050 },
1051 };
1052
1053 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1054 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1055
1056 prop_assert_eq!(deserialized.defaults.allowlist, allowlist);
1057 }
1058 }
1059
1060 /*
1061 // Security-focused tests
1062 #[test]
1063 fn test_config_path_traversal_prevention() {
1064 // Given: Config with potentially malicious paths
1065 let malicious_paths = vec![
1066 "../../../etc/passwd",
1067 "..\\..\\..\\windows\\system32",
1068 "/etc/shadow",
1069 "../../.ssh/id_rsa",
1070 ];
1071
1072 for malicious_path in malicious_paths {
1073 // When: Creating config with malicious path
1074 let config = Config {
1075 defaults: DefaultsConfig {
1076 refresh_hours: 24,
1077 max_archives: 10,
1078 fetch_enabled: true,
1079 follow_links: FollowLinks::FirstParty,
1080 allowlist: vec![],
1081 },
1082 paths: PathsConfig {
1083 root: PathBuf::from(malicious_path),
1084 },
1085 };
1086
1087 // Then: Should still serialize/deserialize (path validation is separate)
1088 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1089 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1090 assert_eq!(deserialized.paths.root, PathBuf::from(malicious_path));
1091 }
1092 }
1093
1094 #[test]
1095 fn test_config_malicious_toml_injection() {
1096 // Given: Potentially malicious TOML strings that could break parsing
1097 let malicious_strings = vec![
1098 "\n[malicious]\nkey = \"value\"",
1099 "\"quotes\"in\"weird\"places",
1100 "key = \"value\"\n[new_section]",
1101 "unicode = \"\\u0000\\u0001\\u0002\"",
1102 ];
1103
1104 for malicious_string in malicious_strings {
1105 // When: Setting allowlist with potentially malicious content
1106 let config = Config {
1107 defaults: DefaultsConfig {
1108 refresh_hours: 24,
1109 max_archives: 10,
1110 fetch_enabled: true,
1111 follow_links: FollowLinks::Allowlist,
1112 allowlist: vec![malicious_string.to_string()],
1113 },
1114 paths: PathsConfig {
1115 root: PathBuf::from("/tmp"),
1116 },
1117 };
1118
1119 // Then: Should serialize safely (TOML library handles escaping)
1120 let result = toml::to_string_pretty(&config);
1121 assert!(
1122 result.is_ok(),
1123 "Failed to serialize config with: {malicious_string}"
1124 );
1125
1126 if let Ok(serialized) = result {
1127 let deserialized_result: std::result::Result<Config, _> =
1128 toml::from_str(&serialized);
1129 assert!(
1130 deserialized_result.is_ok(),
1131 "Failed to deserialize config with: {malicious_string}"
1132 );
1133 }
1134 }
1135 }
1136
1137 #[test]
1138 fn test_config_unicode_handling() -> Result<()> {
1139 // Given: Configuration with Unicode content
1140 let unicode_config = Config {
1141 defaults: DefaultsConfig {
1142 refresh_hours: 24,
1143 max_archives: 10,
1144 fetch_enabled: true,
1145 follow_links: FollowLinks::Allowlist,
1146 allowlist: vec![
1147 "例え.com".to_string(), // Japanese
1148 "مثال.com".to_string(), // Arabic
1149 "пример.com".to_string(), // Cyrillic
1150 "🚀.test.com".to_string(), // Emoji
1151 ],
1152 },
1153 paths: PathsConfig {
1154 root: PathBuf::from("/tmp/测试"), // Chinese characters
1155 },
1156 };
1157
1158 // When: Serializing and deserializing
1159 let serialized = toml::to_string_pretty(&unicode_config)?;
1160 let deserialized: Config = toml::from_str(&serialized)?;
1161
1162 // Then: Unicode should be preserved correctly
1163 assert_eq!(deserialized.defaults.allowlist.len(), 4);
1164 assert!(
1165 deserialized
1166 .defaults
1167 .allowlist
1168 .contains(&"例え.com".to_string())
1169 );
1170 assert!(
1171 deserialized
1172 .defaults
1173 .allowlist
1174 .contains(&"🚀.test.com".to_string())
1175 );
1176 assert_eq!(deserialized.paths.root, PathBuf::from("/tmp/测试"));
1177
1178 Ok(())
1179 }
1180
1181 #[test]
1182 fn test_config_edge_case_empty_values() -> Result<()> {
1183 // Given: Configuration with empty values
1184 let empty_config = Config {
1185 defaults: DefaultsConfig {
1186 refresh_hours: 0, // Edge case: zero refresh
1187 max_archives: 0, // Edge case: no archives
1188 fetch_enabled: false,
1189 follow_links: FollowLinks::None,
1190 allowlist: vec![String::new()], // Empty string in allowlist
1191 },
1192 paths: PathsConfig {
1193 root: PathBuf::from(""), // Empty path
1194 },
1195 };
1196
1197 // When: Serializing and deserializing
1198 let serialized = toml::to_string_pretty(&empty_config)?;
1199 let deserialized: Config = toml::from_str(&serialized)?;
1200
1201 // Then: Empty/zero values should be handled correctly
1202 assert_eq!(deserialized.defaults.refresh_hours, 0);
1203 assert_eq!(deserialized.defaults.max_archives, 0);
1204 assert_eq!(deserialized.defaults.allowlist.len(), 1);
1205 assert_eq!(deserialized.defaults.allowlist[0], "");
1206 assert_eq!(deserialized.paths.root, PathBuf::from(""));
1207
1208 Ok(())
1209 }
1210 }
1211 */
1212}