blz_core/config.rs
1//! Configuration management for blz cache system.
2//!
3//! This module provides hierarchical configuration with global defaults and per-source overrides.
4//! Configuration is stored in TOML format and supports environment variable overrides.
5//!
6//! ## Configuration Hierarchy
7//!
8//! 1. **Global config**: Platform-specific config directory (see `GlobalConfig` docs)
9//! 2. **Per-source config**: `<source_dir>/settings.toml`
10//! 3. **Environment variables**: `CACHE_*` prefix
11//!
12//! ## Examples
13//!
14//! ### Loading global configuration:
15//!
16//! ```rust
17//! use blz_core::{Config, Result};
18//!
19//! // Load from default location or create with defaults
20//! let config = Config::load()?;
21//! println!("Cache root: {}", config.paths.root.display());
22//! println!("Refresh interval: {} hours", config.defaults.refresh_hours);
23//! # Ok::<(), blz_core::Error>(())
24//! ```
25//!
26//! ### Working with tool-specific configuration:
27//!
28//! ```rust,no_run
29//! use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
30//! use std::path::Path;
31//!
32//! let tool_config = ToolConfig {
33//! meta: ToolMeta {
34//! name: "react".to_string(),
35//! display_name: Some("React Documentation".to_string()),
36//! homepage: Some("https://react.dev".to_string()),
37//! repo: Some("https://github.com/facebook/react".to_string()),
38//! },
39//! fetch: FetchConfig {
40//! refresh_hours: Some(12), // Override global default
41//! follow_links: None, // Use global default
42//! allowlist: None, // Use global default
43//! },
44//! index: IndexConfig {
45//! max_heading_block_lines: Some(500),
46//! filter_non_english: None, // Use global default
47//! },
48//! };
49//!
50//! // Save to file
51//! tool_config.save(Path::new("react/settings.toml"))?;
52//! # Ok::<(), blz_core::Error>(())
53//! ```
54
55use crate::{Error, Result, profile};
56use serde::{Deserialize, Serialize};
57use std::fs;
58use std::path::{Path, PathBuf};
59
60/// Default value for `filter_non_english` setting.
61///
62/// Returns `true` to enable non-English content filtering by default,
63/// maintaining backward compatibility with existing behavior.
64const fn default_filter_non_english() -> bool {
65 true
66}
67
68/// Global configuration for the blz cache system.
69///
70/// Contains default settings that apply to all sources unless overridden by per-source configuration.
71/// Configuration is automatically loaded from the system config directory or created with sensible defaults.
72///
73/// ## File Location
74///
75/// The configuration file is stored at (searched in order):
76/// - XDG: `$XDG_CONFIG_HOME/blz/config.toml` or `~/.config/blz/config.toml`
77/// - Dotfile fallback: `~/.blz/config.toml`
78///
79/// A `config.local.toml` in the same directory overrides keys from `config.toml`.
80///
81/// ## Example Configuration File
82///
83/// ```toml
84/// [defaults]
85/// refresh_hours = 24
86/// max_archives = 10
87/// fetch_enabled = true
88/// follow_links = "first_party"
89/// allowlist = ["docs.rs", "developer.mozilla.org"]
90///
91/// [paths]
92/// root = "/home/user/.outfitter/blz"
93/// ```
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct Config {
96 /// Default settings for all sources
97 pub defaults: DefaultsConfig,
98 /// File system paths configuration
99 pub paths: PathsConfig,
100}
101
102/// Default settings that apply to all sources unless overridden.
103///
104/// These settings control fetching behavior, caching policies, and link following rules.
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct DefaultsConfig {
107 /// How often to refresh cached content (in hours).
108 ///
109 /// Sources are only re-fetched if they haven't been updated within this interval.
110 /// Set to 0 to always fetch on access.
111 pub refresh_hours: u32,
112
113 /// Maximum number of archived versions to keep per source.
114 ///
115 /// When a source is updated, the previous version is archived. This setting
116 /// controls how many historical versions to retain for diff generation.
117 pub max_archives: usize,
118
119 /// Whether fetching from remote sources is enabled.
120 ///
121 /// When disabled, only locally cached content is used. Useful for offline work
122 /// or environments with restricted network access.
123 pub fetch_enabled: bool,
124
125 /// Policy for following links in llms.txt files.
126 ///
127 /// Controls whether and which external links should be followed when processing
128 /// llms.txt files that contain references to other documentation sources.
129 pub follow_links: FollowLinks,
130
131 /// Domains allowed for link following.
132 ///
133 /// Only used when `follow_links` is set to `Allowlist`. Links to domains
134 /// not in this list will be ignored.
135 pub allowlist: Vec<String>,
136
137 /// Default language filtering behavior.
138 ///
139 /// When `true`, non-English content is filtered during document processing.
140 /// When `false`, all content is retained regardless of language.
141 /// Defaults to `true` for backward compatibility.
142 #[serde(default = "default_filter_non_english")]
143 pub filter_non_english: bool,
144}
145
146/// Policy for following external links in llms.txt files.
147///
148/// This controls how the system handles links to other documentation sources
149/// within llms.txt files.
150#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
151#[serde(rename_all = "snake_case")]
152pub enum FollowLinks {
153 /// Never follow external links.
154 ///
155 /// Only process the original llms.txt file, ignoring any links to other sources.
156 None,
157
158 /// Follow links to the same domain and its immediate subdomains.
159 ///
160 /// For example, if processing `docs.example.com/llms.txt`, links to
161 /// `api.example.com/docs` or `example.com/guide` would be followed,
162 /// but `other-site.com/docs` would be ignored.
163 FirstParty,
164
165 /// Only follow links to domains in the allowlist.
166 ///
167 /// Use the `allowlist` field in `DefaultsConfig` to specify which domains
168 /// are permitted. This provides fine-grained control over which external
169 /// sources are trusted.
170 Allowlist,
171}
172
173/// File system paths configuration.
174///
175/// Defines where cached content, indices, and metadata are stored on the local filesystem.
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct PathsConfig {
178 /// Root directory for all cached content.
179 ///
180 /// Each source gets its own subdirectory under this root. The directory
181 /// structure is: `root/<source_alias>/`
182 ///
183 /// Default locations:
184 /// - Linux: `~/.local/share/blz`
185 /// - macOS: `~/Library/Application Support/dev.outfitter.blz`
186 /// - Windows: `%APPDATA%\outfitter\blz`
187 pub root: PathBuf,
188}
189
190impl Config {
191 /// Load configuration from the default location or create with defaults.
192 ///
193 /// This method attempts to load the configuration from the system config directory.
194 /// If the file doesn't exist, it returns a configuration with sensible defaults.
195 /// If the file exists but is malformed, it returns an error.
196 ///
197 /// # Returns
198 ///
199 /// Returns the loaded configuration or a default configuration if no file exists.
200 ///
201 /// # Errors
202 ///
203 /// Returns an error if:
204 /// - The config directory cannot be determined (unsupported platform)
205 /// - The config file exists but cannot be read (permissions, I/O error)
206 /// - The config file exists but contains invalid TOML syntax
207 /// - The config file exists but contains invalid configuration values
208 ///
209 /// # Examples
210 ///
211 /// ```rust
212 /// use blz_core::Config;
213 ///
214 /// // Load existing config or create with defaults
215 /// let config = Config::load()?;
216 ///
217 /// if config.defaults.fetch_enabled {
218 /// println!("Fetching is enabled");
219 /// }
220 /// # Ok::<(), blz_core::Error>(())
221 /// ```
222 pub fn load() -> Result<Self> {
223 // Determine base config path (BLZ_CONFIG/BLZ_CONFIG_DIR, XDG, dotfile), or use defaults
224 let base_path = Self::existing_config_path()?;
225
226 // Load base
227 let mut base_value: toml::Value = if let Some(ref path) = base_path {
228 let content = fs::read_to_string(path)
229 .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
230 toml::from_str(&content)
231 .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))?
232 } else {
233 let default_str = toml::to_string(&Self::default())
234 .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?;
235 toml::from_str(&default_str)
236 .map_err(|e| Error::Config(format!("Failed to init default config: {e}")))?
237 };
238
239 // Merge optional local override next to resolved base directory
240 let base_dir = base_path.as_deref().map_or_else(
241 || {
242 Self::canonical_config_path().map_or_else(
243 |_| PathBuf::new(),
244 |p| p.parent().map(Path::to_path_buf).unwrap_or_default(),
245 )
246 },
247 |bp| bp.parent().map(Path::to_path_buf).unwrap_or_default(),
248 );
249
250 let local_path = base_dir.join("config.local.toml");
251 if local_path.exists() {
252 let content = fs::read_to_string(&local_path)
253 .map_err(|e| Error::Config(format!("Failed to read local config: {e}")))?;
254 let local_value: toml::Value = toml::from_str(&content)
255 .map_err(|e| Error::Config(format!("Failed to parse local config: {e}")))?;
256 Self::merge_toml(&mut base_value, &local_value);
257 }
258
259 // Deserialize
260 let mut config: Self = base_value
261 .try_into()
262 .map_err(|e| Error::Config(format!("Failed to materialize config: {e}")))?;
263
264 // Apply env overrides
265 config.apply_env_overrides();
266
267 Ok(config)
268 }
269
270 /// Save the configuration to the default location.
271 ///
272 /// This method serializes the configuration to TOML format and writes it to
273 /// the system config directory. Parent directories are created if they don't exist.
274 ///
275 /// # Errors
276 ///
277 /// Returns an error if:
278 /// - The config directory cannot be determined (unsupported platform)
279 /// - Parent directories cannot be created (permissions, disk space)
280 /// - The configuration cannot be serialized to TOML
281 /// - The file cannot be written (permissions, disk space, I/O error)
282 ///
283 /// # Examples
284 ///
285 /// ```rust,no_run
286 /// use blz_core::{Config, DefaultsConfig, PathsConfig, FollowLinks};
287 /// use std::path::PathBuf;
288 ///
289 /// let mut config = Config::load()?;
290 /// config.defaults.refresh_hours = 12; // Update refresh interval
291 /// config.save()?; // Persist changes
292 /// # Ok::<(), blz_core::Error>(())
293 /// ```
294 pub fn save(&self) -> Result<()> {
295 let config_path = Self::save_target_path()?;
296 let parent = config_path
297 .parent()
298 .ok_or_else(|| Error::Config("Invalid config path".into()))?;
299
300 fs::create_dir_all(parent)
301 .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
302
303 let content = toml::to_string_pretty(self)
304 .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
305
306 let tmp = parent.join("config.toml.tmp");
307 fs::write(&tmp, &content)
308 .map_err(|e| Error::Config(format!("Failed to write temp config: {e}")))?;
309 // Best-effort atomic replace; on Windows, rename() replaces if target does not exist.
310 // SAFETY: config.toml write is replaced in one step to avoid torn files.
311 #[cfg(target_os = "windows")]
312 if config_path.exists() {
313 fs::remove_file(&config_path)
314 .map_err(|e| Error::Config(format!("Failed to remove existing config: {e}")))?;
315 }
316 std::fs::rename(&tmp, &config_path)
317 .map_err(|e| Error::Config(format!("Failed to replace config: {e}")))?;
318
319 Ok(())
320 }
321
322 /// Get the path where the global configuration file is stored.
323 ///
324 /// Uses the system-appropriate config directory based on the platform:
325 /// - Linux: `~/.config/blz/global.toml`
326 /// - macOS: `~/Library/Application Support/dev.outfitter.blz/global.toml`
327 /// - Windows: `%APPDATA%\outfitter\blz\global.toml`
328 ///
329 /// # Errors
330 ///
331 /// Returns an error if the system config directory cannot be determined,
332 /// which may happen on unsupported platforms or in sandboxed environments.
333 fn canonical_config_path() -> Result<PathBuf> {
334 let xdg = std::env::var("XDG_CONFIG_HOME")
335 .ok()
336 .map(PathBuf::from)
337 .or_else(|| directories::BaseDirs::new().map(|b| b.home_dir().join(".config")))
338 .ok_or_else(|| Error::Config("Failed to determine XDG config directory".into()))?;
339 Ok(xdg.join(profile::app_dir_slug()).join("config.toml"))
340 }
341
342 fn dotfile_config_path() -> Result<PathBuf> {
343 let home = directories::BaseDirs::new()
344 .map(|b| b.home_dir().to_path_buf())
345 .ok_or_else(|| Error::Config("Failed to determine home directory".into()))?;
346 Ok(home.join(profile::dot_dir_slug()).join("config.toml"))
347 }
348
349 fn existing_config_path() -> Result<Option<PathBuf>> {
350 // 1) BLZ_CONFIG (file)
351 if let Ok(explicit) = std::env::var("BLZ_CONFIG") {
352 let explicit = explicit.trim();
353 if !explicit.is_empty() {
354 let p = PathBuf::from(explicit);
355 if p.is_file() && p.exists() {
356 return Ok(Some(p));
357 }
358 }
359 }
360
361 // 2) BLZ_CONFIG_DIR (dir)
362 if let Ok(dir) = std::env::var("BLZ_CONFIG_DIR") {
363 let dir = dir.trim();
364 if !dir.is_empty() {
365 let p = PathBuf::from(dir).join("config.toml");
366 if p.is_file() && p.exists() {
367 return Ok(Some(p));
368 }
369 }
370 }
371
372 // 3) XDG
373 let xdg = Self::canonical_config_path()?;
374 if xdg.exists() {
375 return Ok(Some(xdg));
376 }
377 // 4) Dotfile
378 let dot = Self::dotfile_config_path()?;
379 if dot.exists() {
380 return Ok(Some(dot));
381 }
382 Ok(None)
383 }
384
385 fn save_target_path() -> Result<PathBuf> {
386 if let Some(existing) = Self::existing_config_path()? {
387 return Ok(existing);
388 }
389 Self::canonical_config_path()
390 }
391
392 fn merge_toml(dst: &mut toml::Value, src: &toml::Value) {
393 use toml::Value::Table;
394 match (dst, src) {
395 (Table(dst_tbl), Table(src_tbl)) => {
396 for (k, v) in src_tbl {
397 match dst_tbl.get_mut(k) {
398 Some(dst_v) => Self::merge_toml(dst_v, v),
399 None => {
400 dst_tbl.insert(k.clone(), v.clone());
401 },
402 }
403 }
404 },
405 (dst_v, src_v) => *dst_v = src_v.clone(),
406 }
407 }
408
409 fn apply_env_overrides(&mut self) {
410 if let Ok(v) = std::env::var("BLZ_REFRESH_HOURS") {
411 if let Ok(n) = v.parse::<u32>() {
412 self.defaults.refresh_hours = n;
413 }
414 }
415 if let Ok(v) = std::env::var("BLZ_MAX_ARCHIVES") {
416 if let Ok(n) = v.parse::<usize>() {
417 self.defaults.max_archives = n;
418 }
419 }
420 if let Ok(v) = std::env::var("BLZ_FETCH_ENABLED") {
421 let norm = v.to_ascii_lowercase();
422 self.defaults.fetch_enabled = matches!(norm.as_str(), "1" | "true" | "yes" | "on");
423 }
424 if let Ok(v) = std::env::var("BLZ_FOLLOW_LINKS") {
425 match v.to_ascii_lowercase().as_str() {
426 "none" => self.defaults.follow_links = FollowLinks::None,
427 "first_party" | "firstparty" => {
428 self.defaults.follow_links = FollowLinks::FirstParty;
429 },
430 "allowlist" => self.defaults.follow_links = FollowLinks::Allowlist,
431 _ => {},
432 }
433 }
434 if let Ok(v) = std::env::var("BLZ_ALLOWLIST") {
435 let list = v
436 .split(',')
437 .map(|s| s.trim().to_string())
438 .filter(|s| !s.is_empty())
439 .collect::<Vec<_>>();
440 if !list.is_empty() {
441 self.defaults.allowlist = list;
442 }
443 }
444 if let Ok(v) = std::env::var("BLZ_ROOT") {
445 let p = PathBuf::from(v);
446 if !p.as_os_str().is_empty() {
447 self.paths.root = p;
448 }
449 }
450 }
451}
452
453impl Default for Config {
454 fn default() -> Self {
455 Self {
456 defaults: DefaultsConfig {
457 refresh_hours: 24,
458 max_archives: 10,
459 fetch_enabled: true,
460 follow_links: FollowLinks::FirstParty,
461 allowlist: Vec::new(),
462 filter_non_english: true,
463 },
464 paths: PathsConfig {
465 root: directories::ProjectDirs::from("dev", "outfitter", profile::app_dir_slug())
466 .map_or_else(
467 || {
468 // Expand home directory properly
469 directories::BaseDirs::new().map_or_else(
470 || PathBuf::from(".outfitter").join(profile::app_dir_slug()),
471 |base| {
472 base.home_dir()
473 .join(".outfitter")
474 .join(profile::app_dir_slug())
475 },
476 )
477 },
478 |dirs| dirs.data_dir().to_path_buf(),
479 ),
480 },
481 }
482 }
483}
484
485/// Per-source configuration that overrides global defaults.
486///
487/// Each documentation source can have its own configuration file (`settings.toml`)
488/// that overrides the global configuration for that specific source. This allows
489/// fine-grained control over fetching behavior, indexing parameters, and metadata.
490///
491/// ## File Location
492///
493/// Stored as `<cache_root>/<source_alias>/settings.toml`
494///
495/// ## Example Configuration File
496///
497/// ```toml
498/// [meta]
499/// name = "react"
500/// display_name = "React Documentation"
501/// homepage = "https://react.dev"
502/// repo = "https://github.com/facebook/react"
503///
504/// [fetch]
505/// refresh_hours = 12 # Override global default
506/// follow_links = "first_party"
507/// allowlist = ["reactjs.org", "react.dev"]
508///
509/// [index]
510/// max_heading_block_lines = 500
511/// ```
512#[derive(Debug, Clone, Serialize, Deserialize)]
513pub struct ToolConfig {
514 /// Metadata about the documentation source
515 pub meta: ToolMeta,
516 /// Fetching behavior overrides
517 pub fetch: FetchConfig,
518 /// Indexing parameter overrides
519 pub index: IndexConfig,
520}
521
522/// Metadata about a documentation source.
523///
524/// This information is used for display purposes and to provide context
525/// about the source of documentation being cached.
526#[derive(Debug, Clone, Serialize, Deserialize)]
527pub struct ToolMeta {
528 /// Unique identifier for this source (used as directory name).
529 ///
530 /// Should be a valid filename that uniquely identifies the source.
531 /// Typically lowercase with hyphens (e.g., "react", "node-js", "rust-std").
532 pub name: String,
533
534 /// Human-readable display name for the source.
535 ///
536 /// Used in search results and UI displays. If not provided, the `name`
537 /// field is used as fallback.
538 pub display_name: Option<String>,
539
540 /// Homepage URL for the documentation source.
541 ///
542 /// The main website or documentation portal for this source.
543 /// Used for reference and linking back to the original documentation.
544 pub homepage: Option<String>,
545
546 /// Repository URL for the documentation source.
547 ///
548 /// Link to the source code repository, if available. Useful for
549 /// understanding the project context and accessing source code.
550 pub repo: Option<String>,
551}
552
553/// Per-source fetching behavior overrides.
554///
555/// These settings override the global defaults for fetching behavior.
556/// Any `None` values will use the corresponding global default setting.
557#[derive(Debug, Clone, Serialize, Deserialize)]
558pub struct FetchConfig {
559 /// Override for refresh interval in hours.
560 ///
561 /// If `Some`, overrides the global `refresh_hours` setting for this source.
562 /// If `None`, uses the global default.
563 pub refresh_hours: Option<u32>,
564
565 /// Override for link following policy.
566 ///
567 /// If `Some`, overrides the global `follow_links` setting for this source.
568 /// If `None`, uses the global default.
569 pub follow_links: Option<FollowLinks>,
570
571 /// Override for allowed domains list.
572 ///
573 /// If `Some`, overrides the global `allowlist` setting for this source.
574 /// If `None`, uses the global default. Only used when `follow_links` is `Allowlist`.
575 pub allowlist: Option<Vec<String>>,
576}
577
578/// Per-source indexing parameter overrides.
579///
580/// These settings control how the documentation is processed and indexed
581/// for this specific source, overriding global defaults where specified.
582#[derive(Debug, Clone, Serialize, Deserialize)]
583pub struct IndexConfig {
584 /// Maximum lines to include in a single heading block.
585 ///
586 /// Controls how large sections are broken up during indexing. Larger values
587 /// include more context but may reduce search precision. Smaller values
588 /// provide more focused results but may split related content.
589 ///
590 /// If `None`, uses a sensible default based on content analysis.
591 pub max_heading_block_lines: Option<usize>,
592
593 /// Override language filtering for this source.
594 ///
595 /// If `Some(true)`, non-English content will be filtered regardless of global default.
596 /// If `Some(false)`, all content will be retained regardless of global default.
597 /// If `None`, uses the global `filter_non_english` setting.
598 pub filter_non_english: Option<bool>,
599}
600
601impl ToolConfig {
602 /// Load per-source configuration from a file.
603 ///
604 /// Loads and parses a TOML configuration file for a specific documentation source.
605 /// The file should contain sections for `[meta]`, `[fetch]`, and `[index]`.
606 ///
607 /// # Arguments
608 ///
609 /// * `path` - Path to the configuration file (typically `settings.toml`)
610 ///
611 /// # Returns
612 ///
613 /// Returns the parsed configuration.
614 ///
615 /// # Errors
616 ///
617 /// Returns an error if:
618 /// - The file cannot be read (doesn't exist, permissions, I/O error)
619 /// - The file contains invalid TOML syntax
620 /// - The file contains invalid configuration values
621 /// - Required fields are missing (e.g., `meta.name`)
622 ///
623 /// # Examples
624 ///
625 /// ```rust,no_run
626 /// use blz_core::ToolConfig;
627 /// use std::path::Path;
628 ///
629 /// // Load source-specific configuration
630 /// let config_path = Path::new("sources/react/settings.toml");
631 /// let tool_config = ToolConfig::load(config_path)?;
632 ///
633 /// println!("Source: {}", tool_config.meta.name);
634 /// if let Some(refresh) = tool_config.fetch.refresh_hours {
635 /// println!("Custom refresh interval: {} hours", refresh);
636 /// }
637 /// # Ok::<(), blz_core::Error>(())
638 /// ```
639 pub fn load(path: &Path) -> Result<Self> {
640 let content = fs::read_to_string(path)
641 .map_err(|e| Error::Config(format!("Failed to read tool config: {e}")))?;
642 toml::from_str(&content)
643 .map_err(|e| Error::Config(format!("Failed to parse tool config: {e}")))
644 }
645
646 /// Save per-source configuration to a file.
647 ///
648 /// Serializes the configuration to TOML format and writes it to the specified path.
649 /// The parent directory must already exist.
650 ///
651 /// # Arguments
652 ///
653 /// * `path` - Path where to save the configuration file
654 ///
655 /// # Errors
656 ///
657 /// Returns an error if:
658 /// - The configuration cannot be serialized to TOML
659 /// - The parent directory doesn't exist
660 /// - The file cannot be written (permissions, disk space, I/O error)
661 ///
662 /// # Examples
663 ///
664 /// ```rust,no_run
665 /// use blz_core::{ToolConfig, ToolMeta, FetchConfig, IndexConfig};
666 /// use std::path::Path;
667 ///
668 /// let config = ToolConfig {
669 /// meta: ToolMeta {
670 /// name: "my-docs".to_string(),
671 /// display_name: Some("My Documentation".to_string()),
672 /// homepage: None,
673 /// repo: None,
674 /// },
675 /// fetch: FetchConfig {
676 /// refresh_hours: Some(6),
677 /// follow_links: None,
678 /// allowlist: None,
679 /// },
680 /// index: IndexConfig {
681 /// max_heading_block_lines: Some(300),
682 /// filter_non_english: None,
683 /// },
684 /// };
685 ///
686 /// let config_path = Path::new("my-docs/settings.toml");
687 /// config.save(config_path)?;
688 /// # Ok::<(), blz_core::Error>(())
689 /// ```
690 pub fn save(&self, path: &Path) -> Result<()> {
691 let content = toml::to_string_pretty(self)
692 .map_err(|e| Error::Config(format!("Failed to serialize tool config: {e}")))?;
693 fs::write(path, content)
694 .map_err(|e| Error::Config(format!("Failed to write tool config: {e}")))?;
695 Ok(())
696 }
697}
698
699#[cfg(test)]
700#[allow(
701 clippy::panic,
702 clippy::disallowed_macros,
703 clippy::unwrap_used,
704 clippy::unnecessary_wraps
705)]
706mod tests {
707 use super::*;
708 use proptest::prelude::*;
709 use std::fs;
710 use tempfile::TempDir;
711
712 // Test fixtures
713 fn create_test_config() -> Config {
714 Config {
715 defaults: DefaultsConfig {
716 refresh_hours: 12,
717 max_archives: 5,
718 fetch_enabled: true,
719 follow_links: FollowLinks::Allowlist,
720 allowlist: vec!["example.com".to_string(), "docs.rs".to_string()],
721 filter_non_english: true,
722 },
723 paths: PathsConfig {
724 root: PathBuf::from("/tmp/test"),
725 },
726 }
727 }
728
729 fn create_test_tool_config() -> ToolConfig {
730 ToolConfig {
731 meta: ToolMeta {
732 name: "test-tool".to_string(),
733 display_name: Some("Test Tool".to_string()),
734 homepage: Some("https://test.com".to_string()),
735 repo: Some("https://github.com/test/tool".to_string()),
736 },
737 fetch: FetchConfig {
738 refresh_hours: Some(6),
739 follow_links: Some(FollowLinks::FirstParty),
740 allowlist: Some(vec!["allowed.com".to_string()]),
741 },
742 index: IndexConfig {
743 max_heading_block_lines: Some(100),
744 filter_non_english: None,
745 },
746 }
747 }
748
749 #[test]
750 fn test_default_config_values() {
751 // Given: Default configuration is requested
752 let config = Config::default();
753
754 // When: Examining default values
755 // Then: Should have sensible defaults
756 assert_eq!(config.defaults.refresh_hours, 24);
757 assert_eq!(config.defaults.max_archives, 10);
758 assert!(config.defaults.fetch_enabled);
759 assert!(matches!(
760 config.defaults.follow_links,
761 FollowLinks::FirstParty
762 ));
763 assert!(config.defaults.allowlist.is_empty());
764 assert!(config.defaults.filter_non_english);
765 assert!(!config.paths.root.as_os_str().is_empty());
766 }
767
768 #[test]
769 fn test_follow_links_serialization() -> Result<()> {
770 // Given: Different FollowLinks variants
771 let variants = vec![
772 FollowLinks::None,
773 FollowLinks::FirstParty,
774 FollowLinks::Allowlist,
775 ];
776
777 for variant in variants {
778 // When: Serializing and deserializing
779 let serialized = serde_json::to_string(&variant)?;
780 let deserialized: FollowLinks = serde_json::from_str(&serialized)?;
781
782 // Then: Should round-trip correctly
783 assert_eq!(variant, deserialized, "Round-trip failed for {variant:?}");
784 }
785 Ok(())
786 }
787
788 #[test]
789 fn test_config_save_and_load_roundtrip() -> Result<()> {
790 // Given: A temporary directory and test configuration
791 let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
792 let config_path = temp_dir.path().join("test_config.toml");
793 let original_config = create_test_config();
794
795 // When: Saving and then loading the configuration
796 let content = toml::to_string_pretty(&original_config)
797 .map_err(|e| Error::Config(format!("Failed to serialize: {e}")))?;
798 fs::write(&config_path, content)
799 .map_err(|e| Error::Config(format!("Failed to write: {e}")))?;
800
801 let loaded_config: Config = {
802 let content = fs::read_to_string(&config_path)
803 .map_err(|e| Error::Config(format!("Failed to read: {e}")))?;
804 toml::from_str(&content).map_err(|e| Error::Config(format!("Failed to parse: {e}")))?
805 };
806
807 // Then: Configurations should be identical
808 assert_eq!(
809 loaded_config.defaults.refresh_hours,
810 original_config.defaults.refresh_hours
811 );
812 assert_eq!(
813 loaded_config.defaults.max_archives,
814 original_config.defaults.max_archives
815 );
816 assert_eq!(
817 loaded_config.defaults.fetch_enabled,
818 original_config.defaults.fetch_enabled
819 );
820 assert_eq!(
821 loaded_config.defaults.allowlist,
822 original_config.defaults.allowlist
823 );
824 assert_eq!(loaded_config.paths.root, original_config.paths.root);
825
826 Ok(())
827 }
828
829 #[test]
830 fn test_config_load_missing_file() {
831 // Given: A non-existent config file path
832 let non_existent = PathBuf::from("/definitely/does/not/exist/config.toml");
833
834 // When: Attempting to load config
835 let result = (|| -> Result<Config> {
836 let content = fs::read_to_string(&non_existent)
837 .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
838 toml::from_str(&content)
839 .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
840 })();
841
842 // Then: Should return appropriate error
843 assert!(result.is_err());
844 match result {
845 Err(Error::Config(msg)) => assert!(msg.contains("Failed to read config")),
846 _ => unreachable!("Expected Config error"),
847 }
848 }
849
850 #[test]
851 fn test_config_parse_invalid_toml() {
852 // Given: Invalid TOML content
853 let temp_dir = TempDir::new().expect("Failed to create temp dir");
854 let config_path = temp_dir.path().join("invalid.toml");
855 fs::write(&config_path, "this is not valid toml [[[").expect("Failed to write test file");
856
857 // When: Attempting to parse
858 let result = (|| -> Result<Config> {
859 let content = fs::read_to_string(&config_path)
860 .map_err(|e| Error::Config(format!("Failed to read config: {e}")))?;
861 toml::from_str(&content)
862 .map_err(|e| Error::Config(format!("Failed to parse config: {e}")))
863 })();
864
865 // Then: Should return parse error
866 assert!(result.is_err());
867 if let Err(Error::Config(msg)) = result {
868 assert!(msg.contains("Failed to parse config"));
869 } else {
870 panic!("Expected Config parse error");
871 }
872 }
873
874 #[test]
875 fn test_config_save_creates_directory() -> Result<()> {
876 // Given: A temporary directory and nested config path
877 let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
878 let nested_path = temp_dir
879 .path()
880 .join("nested")
881 .join("deeper")
882 .join("config.toml");
883 let config = create_test_config();
884
885 // When: Saving config to nested path (simulating Config::save logic)
886 let parent = nested_path
887 .parent()
888 .ok_or_else(|| Error::Config("Invalid config path".into()))?;
889 fs::create_dir_all(parent)
890 .map_err(|e| Error::Config(format!("Failed to create config directory: {e}")))?;
891
892 let content = toml::to_string_pretty(&config)
893 .map_err(|e| Error::Config(format!("Failed to serialize config: {e}")))?;
894 fs::write(&nested_path, content)
895 .map_err(|e| Error::Config(format!("Failed to write config: {e}")))?;
896
897 // Then: Directory should be created and file should exist
898 assert!(nested_path.exists());
899 assert!(
900 nested_path
901 .parent()
902 .expect("path should have parent")
903 .exists()
904 );
905
906 Ok(())
907 }
908
909 #[test]
910 fn test_tool_config_roundtrip() -> Result<()> {
911 // Given: A temporary file and test tool configuration
912 let temp_dir = TempDir::new().map_err(|e| Error::Config(e.to_string()))?;
913 let config_path = temp_dir.path().join("tool.toml");
914 let original_config = create_test_tool_config();
915
916 // When: Saving and loading the tool configuration
917 original_config.save(&config_path)?;
918 let loaded_config = ToolConfig::load(&config_path)?;
919
920 // Then: Configurations should be identical
921 assert_eq!(loaded_config.meta.name, original_config.meta.name);
922 assert_eq!(
923 loaded_config.meta.display_name,
924 original_config.meta.display_name
925 );
926 assert_eq!(loaded_config.meta.homepage, original_config.meta.homepage);
927 assert_eq!(loaded_config.meta.repo, original_config.meta.repo);
928 assert_eq!(
929 loaded_config.fetch.refresh_hours,
930 original_config.fetch.refresh_hours
931 );
932 assert_eq!(
933 loaded_config.fetch.allowlist,
934 original_config.fetch.allowlist
935 );
936 assert_eq!(
937 loaded_config.index.max_heading_block_lines,
938 original_config.index.max_heading_block_lines
939 );
940
941 Ok(())
942 }
943
944 #[test]
945 fn test_tool_config_load_nonexistent_file() {
946 // Given: A non-existent file path
947 let non_existent = PathBuf::from("/does/not/exist/tool.toml");
948
949 // When: Attempting to load
950 let result = ToolConfig::load(&non_existent);
951
952 // Then: Should return appropriate error
953 assert!(result.is_err());
954 if let Err(Error::Config(msg)) = result {
955 assert!(msg.contains("Failed to read tool config"));
956 } else {
957 panic!("Expected Config error");
958 }
959 }
960
961 #[test]
962 fn test_config_with_extreme_values() -> Result<()> {
963 // Given: Configuration with extreme but valid values (avoiding serialization limits)
964 let extreme_config = Config {
965 defaults: DefaultsConfig {
966 refresh_hours: 1_000_000, // Large but not MAX to avoid TOML issues
967 max_archives: 1_000_000, // Large but not MAX to avoid TOML issues
968 fetch_enabled: false,
969 follow_links: FollowLinks::None,
970 allowlist: vec!["a".repeat(1000)], // Very long domain
971 filter_non_english: false,
972 },
973 paths: PathsConfig {
974 root: PathBuf::from("/".repeat(100)), // Very long path
975 },
976 };
977
978 // When: Serializing and deserializing
979 let serialized = toml::to_string_pretty(&extreme_config)
980 .map_err(|e| Error::Config(format!("Serialize failed: {e}")))?;
981 let deserialized: Config = toml::from_str(&serialized)
982 .map_err(|e| Error::Config(format!("Deserialize failed: {e}")))?;
983
984 // Then: Should handle extreme values correctly
985 assert_eq!(deserialized.defaults.refresh_hours, 1_000_000);
986 assert_eq!(deserialized.defaults.max_archives, 1_000_000);
987 assert!(!deserialized.defaults.fetch_enabled);
988 assert_eq!(deserialized.defaults.allowlist.len(), 1);
989 assert_eq!(deserialized.defaults.allowlist[0].len(), 1000);
990
991 Ok(())
992 }
993
994 #[test]
995 fn test_config_empty_allowlist() -> Result<()> {
996 // Given: Configuration with empty allowlist
997 let config = Config {
998 defaults: DefaultsConfig {
999 refresh_hours: 24,
1000 max_archives: 10,
1001 fetch_enabled: true,
1002 follow_links: FollowLinks::Allowlist,
1003 allowlist: vec![], // Empty allowlist
1004 filter_non_english: true,
1005 },
1006 paths: PathsConfig {
1007 root: PathBuf::from("/tmp"),
1008 },
1009 };
1010
1011 // When: Serializing and deserializing
1012 let serialized = toml::to_string_pretty(&config)?;
1013 let deserialized: Config = toml::from_str(&serialized)?;
1014
1015 // Then: Empty allowlist should be preserved
1016 assert!(deserialized.defaults.allowlist.is_empty());
1017 assert!(matches!(
1018 deserialized.defaults.follow_links,
1019 FollowLinks::Allowlist
1020 ));
1021
1022 Ok(())
1023 }
1024
1025 #[test]
1026 fn test_defaults_config_backward_compatibility_filter_non_english() -> Result<()> {
1027 // Given: Configuration TOML without filter_non_english field (backward compatibility)
1028 let toml_without_filter = r#"
1029 [defaults]
1030 refresh_hours = 24
1031 max_archives = 10
1032 fetch_enabled = true
1033 follow_links = "first_party"
1034 allowlist = []
1035
1036 [paths]
1037 root = "/tmp/test"
1038 "#;
1039
1040 // When: Deserializing old config
1041 let config: Config = toml::from_str(toml_without_filter)
1042 .map_err(|e| Error::Config(format!("Failed to parse: {e}")))?;
1043
1044 // Then: Should use default value (true)
1045 assert!(config.defaults.filter_non_english);
1046 assert_eq!(config.defaults.refresh_hours, 24);
1047
1048 Ok(())
1049 }
1050
1051 #[test]
1052 fn test_index_config_backward_compatibility_filter_non_english() -> Result<()> {
1053 // Given: IndexConfig without filter_non_english field (backward compatibility)
1054 let config = IndexConfig {
1055 max_heading_block_lines: Some(500),
1056 filter_non_english: None,
1057 };
1058
1059 // When: Serializing and deserializing
1060 let serialized = serde_json::to_string(&config)
1061 .map_err(|e| Error::Config(format!("Failed to serialize: {e}")))?;
1062 let deserialized: IndexConfig = serde_json::from_str(&serialized)
1063 .map_err(|e| Error::Config(format!("Failed to deserialize: {e}")))?;
1064
1065 // Then: None should be preserved (uses global default)
1066 assert_eq!(deserialized.filter_non_english, None);
1067 assert_eq!(deserialized.max_heading_block_lines, Some(500));
1068
1069 Ok(())
1070 }
1071
1072 #[test]
1073 fn test_filter_non_english_serialization() -> Result<()> {
1074 // Given: Config with filter_non_english explicitly set to false
1075 let config = Config {
1076 defaults: DefaultsConfig {
1077 refresh_hours: 24,
1078 max_archives: 10,
1079 fetch_enabled: true,
1080 follow_links: FollowLinks::FirstParty,
1081 allowlist: vec![],
1082 filter_non_english: false,
1083 },
1084 paths: PathsConfig {
1085 root: PathBuf::from("/tmp"),
1086 },
1087 };
1088
1089 // When: Serializing and deserializing
1090 let serialized = toml::to_string_pretty(&config)?;
1091 let deserialized: Config = toml::from_str(&serialized)?;
1092
1093 // Then: false value should be preserved
1094 assert!(!deserialized.defaults.filter_non_english);
1095
1096 Ok(())
1097 }
1098
1099 // Property-based tests
1100 proptest! {
1101 #[test]
1102 fn test_config_refresh_hours_roundtrip(refresh_hours in 1u32..=365*24) {
1103 let config = Config {
1104 defaults: DefaultsConfig {
1105 refresh_hours,
1106 max_archives: 10,
1107 fetch_enabled: true,
1108 follow_links: FollowLinks::FirstParty,
1109 allowlist: vec![],
1110 filter_non_english: true,
1111 },
1112 paths: PathsConfig {
1113 root: PathBuf::from("/tmp"),
1114 },
1115 };
1116
1117 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1118 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1119
1120 prop_assert_eq!(deserialized.defaults.refresh_hours, refresh_hours);
1121 }
1122
1123 #[test]
1124 fn test_config_max_archives_roundtrip(max_archives in 1usize..=1000) {
1125 let config = Config {
1126 defaults: DefaultsConfig {
1127 refresh_hours: 24,
1128 max_archives,
1129 fetch_enabled: true,
1130 follow_links: FollowLinks::FirstParty,
1131 allowlist: vec![],
1132 filter_non_english: true,
1133 },
1134 paths: PathsConfig {
1135 root: PathBuf::from("/tmp"),
1136 },
1137 };
1138
1139 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1140 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1141
1142 prop_assert_eq!(deserialized.defaults.max_archives, max_archives);
1143 }
1144
1145 #[test]
1146 fn test_config_allowlist_roundtrip(allowlist in prop::collection::vec(r"[a-z0-9\.-]+", 0..=10)) {
1147 let config = Config {
1148 defaults: DefaultsConfig {
1149 refresh_hours: 24,
1150 max_archives: 10,
1151 fetch_enabled: true,
1152 follow_links: FollowLinks::Allowlist,
1153 allowlist: allowlist.clone(),
1154 filter_non_english: true,
1155 },
1156 paths: PathsConfig {
1157 root: PathBuf::from("/tmp"),
1158 },
1159 };
1160
1161 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1162 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1163
1164 prop_assert_eq!(deserialized.defaults.allowlist, allowlist);
1165 }
1166 }
1167
1168 /*
1169 // Security-focused tests
1170 #[test]
1171 fn test_config_path_traversal_prevention() {
1172 // Given: Config with potentially malicious paths
1173 let malicious_paths = vec![
1174 "../../../etc/passwd",
1175 "..\\..\\..\\windows\\system32",
1176 "/etc/shadow",
1177 "../../.ssh/id_rsa",
1178 ];
1179
1180 for malicious_path in malicious_paths {
1181 // When: Creating config with malicious path
1182 let config = Config {
1183 defaults: DefaultsConfig {
1184 refresh_hours: 24,
1185 max_archives: 10,
1186 fetch_enabled: true,
1187 follow_links: FollowLinks::FirstParty,
1188 allowlist: vec![],
1189 },
1190 paths: PathsConfig {
1191 root: PathBuf::from(malicious_path),
1192 },
1193 };
1194
1195 // Then: Should still serialize/deserialize (path validation is separate)
1196 let serialized = toml::to_string_pretty(&config).expect("should serialize");
1197 let deserialized: Config = toml::from_str(&serialized).expect("should deserialize");
1198 assert_eq!(deserialized.paths.root, PathBuf::from(malicious_path));
1199 }
1200 }
1201
1202 #[test]
1203 fn test_config_malicious_toml_injection() {
1204 // Given: Potentially malicious TOML strings that could break parsing
1205 let malicious_strings = vec![
1206 "\n[malicious]\nkey = \"value\"",
1207 "\"quotes\"in\"weird\"places",
1208 "key = \"value\"\n[new_section]",
1209 "unicode = \"\\u0000\\u0001\\u0002\"",
1210 ];
1211
1212 for malicious_string in malicious_strings {
1213 // When: Setting allowlist with potentially malicious content
1214 let config = Config {
1215 defaults: DefaultsConfig {
1216 refresh_hours: 24,
1217 max_archives: 10,
1218 fetch_enabled: true,
1219 follow_links: FollowLinks::Allowlist,
1220 allowlist: vec![malicious_string.to_string()],
1221 },
1222 paths: PathsConfig {
1223 root: PathBuf::from("/tmp"),
1224 },
1225 };
1226
1227 // Then: Should serialize safely (TOML library handles escaping)
1228 let result = toml::to_string_pretty(&config);
1229 assert!(
1230 result.is_ok(),
1231 "Failed to serialize config with: {malicious_string}"
1232 );
1233
1234 if let Ok(serialized) = result {
1235 let deserialized_result: std::result::Result<Config, _> =
1236 toml::from_str(&serialized);
1237 assert!(
1238 deserialized_result.is_ok(),
1239 "Failed to deserialize config with: {malicious_string}"
1240 );
1241 }
1242 }
1243 }
1244
1245 #[test]
1246 fn test_config_unicode_handling() -> Result<()> {
1247 // Given: Configuration with Unicode content
1248 let unicode_config = Config {
1249 defaults: DefaultsConfig {
1250 refresh_hours: 24,
1251 max_archives: 10,
1252 fetch_enabled: true,
1253 follow_links: FollowLinks::Allowlist,
1254 allowlist: vec![
1255 "例え.com".to_string(), // Japanese
1256 "مثال.com".to_string(), // Arabic
1257 "пример.com".to_string(), // Cyrillic
1258 "🚀.test.com".to_string(), // Emoji
1259 ],
1260 },
1261 paths: PathsConfig {
1262 root: PathBuf::from("/tmp/测试"), // Chinese characters
1263 },
1264 };
1265
1266 // When: Serializing and deserializing
1267 let serialized = toml::to_string_pretty(&unicode_config)?;
1268 let deserialized: Config = toml::from_str(&serialized)?;
1269
1270 // Then: Unicode should be preserved correctly
1271 assert_eq!(deserialized.defaults.allowlist.len(), 4);
1272 assert!(
1273 deserialized
1274 .defaults
1275 .allowlist
1276 .contains(&"例え.com".to_string())
1277 );
1278 assert!(
1279 deserialized
1280 .defaults
1281 .allowlist
1282 .contains(&"🚀.test.com".to_string())
1283 );
1284 assert_eq!(deserialized.paths.root, PathBuf::from("/tmp/测试"));
1285
1286 Ok(())
1287 }
1288
1289 #[test]
1290 fn test_config_edge_case_empty_values() -> Result<()> {
1291 // Given: Configuration with empty values
1292 let empty_config = Config {
1293 defaults: DefaultsConfig {
1294 refresh_hours: 0, // Edge case: zero refresh
1295 max_archives: 0, // Edge case: no archives
1296 fetch_enabled: false,
1297 follow_links: FollowLinks::None,
1298 allowlist: vec![String::new()], // Empty string in allowlist
1299 },
1300 paths: PathsConfig {
1301 root: PathBuf::from(""), // Empty path
1302 },
1303 };
1304
1305 // When: Serializing and deserializing
1306 let serialized = toml::to_string_pretty(&empty_config)?;
1307 let deserialized: Config = toml::from_str(&serialized)?;
1308
1309 // Then: Empty/zero values should be handled correctly
1310 assert_eq!(deserialized.defaults.refresh_hours, 0);
1311 assert_eq!(deserialized.defaults.max_archives, 0);
1312 assert_eq!(deserialized.defaults.allowlist.len(), 1);
1313 assert_eq!(deserialized.defaults.allowlist[0], "");
1314 assert_eq!(deserialized.paths.root, PathBuf::from(""));
1315
1316 Ok(())
1317 }
1318 }
1319 */
1320}