ares/config/
mod.rs

1/// import general checker
2use lemmeknow::Identifier;
3use memmap2::Mmap;
4use once_cell::sync::OnceCell;
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7use std::fs::{self, File};
8use std::io::{self, BufRead, BufReader};
9use std::io::{Read, Write};
10use std::path::Path;
11
12/// Library input is the default API input
13/// The CLI turns its arguments into a LibraryInput struct
14/// The Config object is a default configuration object
15/// For the entire program
16/// It's access using a variable like configuration
17/// ```rust
18/// use ares::config::get_config;
19/// let config = get_config();
20/// assert_eq!(config.verbose, 0);
21/// ```
22#[derive(Serialize, Deserialize)]
23#[serde(default)]
24pub struct Config {
25    /// A level of verbosity to determine.
26    /// How much we print in logs.
27    pub verbose: u8,
28    /// The lemmeknow config to use
29    #[serde(skip)]
30    pub lemmeknow_config: Identifier,
31    /// lemmeknow_config serialization fields
32    #[serde(default)]
33    pub lemmeknow_min_rarity: f32,
34    /// Maximum rarity threshold for lemmeknow detection
35    #[serde(default)]
36    pub lemmeknow_max_rarity: f32,
37    /// List of lemmeknow tags to include in detection
38    #[serde(default)]
39    pub lemmeknow_tags: Vec<String>,
40    /// List of lemmeknow tags to exclude from detection
41    #[serde(default)]
42    pub lemmeknow_exclude_tags: Vec<String>,
43    /// Whether to use boundaryless mode in lemmeknow detection
44    #[serde(default)]
45    pub lemmeknow_boundaryless: bool,
46    /// Should the human checker be on?
47    /// This asks yes/no for plaintext. Turn off for API
48    pub human_checker_on: bool,
49    /// The timeout threshold before Ares quits
50    /// This is in seconds
51    pub timeout: u32,
52    /// Whether to collect all plaintexts until timeout expires
53    /// instead of exiting after finding the first valid plaintext
54    pub top_results: bool,
55    /// Is the program being run in API mode?
56    /// This is used to determine if we should print to stdout
57    /// Or return the values
58    pub api_mode: bool,
59    /// Regex enables the user to search for a specific regex or crib
60    pub regex: Option<String>,
61    /// Path to the wordlist file. Will be overridden by CLI argument if provided.
62    pub wordlist_path: Option<String>,
63    /// Wordlist data structure (loaded from file). CLI takes precedence if both config and CLI specify a wordlist.
64    #[serde(skip)]
65    pub wordlist: Option<HashSet<String>>,
66    /// Colourscheme hashmap
67    pub colourscheme: HashMap<String, String>,
68    /// Enables enhanced plaintext detection using a BERT model.
69    pub enhanced_detection: bool,
70    /// Path to the enhanced detection model. If None, will use the default path.
71    pub model_path: Option<String>,
72}
73
74/// Cell for storing global Config
75static CONFIG: OnceCell<Config> = OnceCell::new();
76
77/// To initialize global config with custom values
78pub fn set_global_config(config: Config) {
79    CONFIG.set(config).ok(); // ok() used to make compiler happy about using Result
80}
81
82/// Get the global config.
83/// This will return default config if the config wasn't already initialized
84pub fn get_config() -> &'static Config {
85    CONFIG.get_or_init(Config::default)
86}
87
88/// Creates a default lemmeknow config
89const LEMMEKNOW_DEFAULT_CONFIG: Identifier = Identifier {
90    min_rarity: 0.0_f32,
91    max_rarity: 0.0_f32,
92    tags: vec![],
93    exclude_tags: vec![],
94    file_support: false,
95    boundaryless: false,
96};
97
98/// Convert Config fields into an Identifier
99fn make_identifier_from_config(config: &Config) -> Identifier {
100    Identifier {
101        min_rarity: config.lemmeknow_min_rarity,
102        max_rarity: config.lemmeknow_max_rarity,
103        tags: config.lemmeknow_tags.clone(),
104        exclude_tags: config.lemmeknow_exclude_tags.clone(),
105        file_support: false, // Always false as per LEMMEKNOW_DEFAULT_CONFIG
106        boundaryless: config.lemmeknow_boundaryless,
107    }
108}
109
110/// Update Config's Identifier field from its serialization fields
111fn update_identifier_in_config(config: &mut Config) {
112    config.lemmeknow_config = make_identifier_from_config(config);
113}
114
115impl Default for Config {
116    fn default() -> Self {
117        let mut config = Config {
118            verbose: 0,
119            lemmeknow_config: LEMMEKNOW_DEFAULT_CONFIG,
120            lemmeknow_min_rarity: 0.0_f32,
121            lemmeknow_max_rarity: 0.0_f32,
122            lemmeknow_tags: vec![],
123            lemmeknow_exclude_tags: vec![],
124            lemmeknow_boundaryless: false,
125            human_checker_on: false,
126            timeout: 5,
127            top_results: false,
128            api_mode: false,
129            regex: None,
130            wordlist_path: None,
131            wordlist: None,
132            enhanced_detection: false,
133            model_path: None,
134            colourscheme: HashMap::new(),
135        };
136
137        // Set default colors
138        config
139            .colourscheme
140            .insert(String::from("informational"), String::from("255,215,0")); // Gold yellow
141        config
142            .colourscheme
143            .insert(String::from("warning"), String::from("255,0,0")); // Red
144        config
145            .colourscheme
146            .insert(String::from("success"), String::from("0,255,0")); // Green
147        config
148            .colourscheme
149            .insert(String::from("error"), String::from("255,0,0")); // Red
150
151        config
152            .colourscheme
153            .insert(String::from("question"), String::from("255,215,0")); // Gold yellow (same as informational)
154        config
155    }
156}
157
158/// Get the path to the Ares config file
159///
160/// # Panics
161///
162/// This function will panic if:
163/// - The home directory cannot be found
164/// - The Ares directory cannot be created
165pub fn get_config_file_path() -> std::path::PathBuf {
166    let mut path = dirs::home_dir().expect("Could not find home directory");
167    path.push(".ares");
168    fs::create_dir_all(&path).expect("Could not create Ares directory");
169    path.push("config.toml");
170    path
171}
172
173/// Create a default config file at the specified path
174///
175/// # Panics
176///
177/// This function will panic if:
178/// - The config cannot be serialized to TOML
179/// - The config file path cannot be determined (see `get_config_file_path`)
180pub fn create_default_config_file() -> std::io::Result<()> {
181    let config = Config::default();
182    let toml_string = toml::to_string_pretty(&config).expect("Could not serialize config");
183    let path = get_config_file_path();
184    let mut file = File::create(path)?;
185    file.write_all(toml_string.as_bytes())?;
186    Ok(())
187}
188
189/// Read and parse the config file
190fn read_config_file() -> std::io::Result<String> {
191    let path = get_config_file_path();
192    let mut file = File::open(&path)?;
193    let mut contents = String::new();
194    file.read_to_string(&mut contents)?;
195    Ok(contents)
196}
197
198/// Parse a TOML string into a Config struct, handling unknown keys
199fn parse_toml_with_unknown_keys(contents: &str) -> Config {
200    // First parse into a generic Value to check for unknown keys
201    let parsed_value: toml::Value = toml::from_str(contents).expect("Could not parse config file");
202
203    // Check for unknown keys at the root level
204    if let toml::Value::Table(table) = &parsed_value {
205        let known_keys = [
206            "verbose",
207            "lemmeknow_min_rarity",
208            "enhanced_detection",
209            "model_path",
210            "lemmeknow_max_rarity",
211            "lemmeknow_tags",
212            "lemmeknow_exclude_tags",
213            "lemmeknow_boundaryless",
214            "human_checker_on",
215            "timeout",
216            "top_results",
217            "api_mode",
218            "regex",
219            "wordlist_path",
220            "question",
221            "colourscheme",
222        ];
223        for key in table.keys() {
224            if !known_keys.contains(&key.as_str()) {
225                crate::cli_pretty_printing::warning_unknown_config_key(key);
226            }
227        }
228    }
229
230    // Parse into Config struct
231    let mut config: Config = toml::from_str(contents).expect("Could not parse config file");
232    update_identifier_in_config(&mut config);
233    config
234}
235
236/// Loads a wordlist from a file into a HashSet for efficient lookups
237/// Uses memory mapping for large files to improve performance and memory usage
238///
239/// # Arguments
240/// * `path` - Path to the wordlist file
241///
242/// # Returns
243/// * `Ok(HashSet<String>)` - The loaded wordlist as a HashSet for O(1) lookups
244/// * `Err(io::Error)` - If the file cannot be opened or read
245///
246/// # Errors
247/// This function will return an error if:
248/// * The file does not exist
249/// * The file cannot be opened due to permissions
250/// * The file cannot be memory-mapped
251/// * The file contains invalid UTF-8 characters
252///
253/// # Safety
254/// This implementation uses unsafe code in two places:
255/// 1. Memory mapping (unsafe { Mmap::map(&file) }):
256///    - This is unsafe because the memory map could become invalid if the underlying file is modified
257///    - We accept this risk since the wordlist is only loaded once at startup and not expected to change
258///
259/// 2. UTF-8 conversion (unsafe { std::str::from_utf8_unchecked(&mmap) }):
260///    - This is unsafe because it assumes the file contains valid UTF-8
261///    - We attempt to convert to UTF-8 first and panic if invalid, making this assumption safe
262///    - The unchecked version is used for performance since we verify UTF-8 validity first
263pub fn load_wordlist<P: AsRef<Path>>(path: P) -> io::Result<HashSet<String>> {
264    let file = File::open(path)?;
265    let file_size = file.metadata()?.len();
266
267    // For small files (under 10MB), use regular file reading
268    // This threshold was chosen because:
269    // 1. Most wordlists under 10MB can be loaded quickly with minimal memory overhead
270    // 2. Memory mapping has overhead that may not be worth it for small files
271    // 3. 10MB allows for roughly 1 million words (assuming average word length of 10 chars)
272    if file_size < 10_000_000 {
273        // 10MB threshold
274        let reader = BufReader::new(file);
275        let mut wordlist = HashSet::new();
276
277        for line in reader.lines() {
278            if let Ok(word) = line {
279                let trimmed = word.trim().to_string();
280                if !trimmed.is_empty() {
281                    wordlist.insert(trimmed);
282                }
283            }
284        }
285
286        Ok(wordlist)
287    } else {
288        // For large files, use memory mapping
289        // First create the memory map
290        let mmap = unsafe { Mmap::map(&file)? };
291
292        // Verify the file contains valid UTF-8 before proceeding
293        if std::str::from_utf8(&mmap).is_err() {
294            panic!("Wordlist file contains invalid UTF-8");
295        }
296
297        // Now we can safely use from_utf8_unchecked since we verified it's valid UTF-8
298        let mut wordlist = HashSet::new();
299        let content = unsafe { std::str::from_utf8_unchecked(&mmap) };
300        for line in content.lines() {
301            let trimmed = line.trim();
302            if !trimmed.is_empty() {
303                wordlist.insert(trimmed.to_string());
304            }
305        }
306
307        Ok(wordlist)
308    }
309}
310
311/// Get configuration from file or create default if it doesn't exist
312pub fn get_config_file_into_struct() -> Config {
313    let path = get_config_file_path();
314
315    if !path.exists() {
316        // First run - get user preferences
317        let first_run_config = crate::cli::run_first_time_setup();
318        let mut config = Config::default();
319
320        // Extract color scheme values
321        config.colourscheme = first_run_config
322            .iter()
323            .filter(|(k, _)| !k.starts_with("wordlist") && *k != "timeout")
324            .map(|(k, v)| (k.clone(), v.clone()))
325            .collect();
326
327        // Set timeout if present
328        if let Some(timeout) = first_run_config.get("timeout") {
329            config.timeout = timeout.parse().unwrap_or(5);
330        }
331
332        // Extract wordlist path if present
333        if let Some(wordlist_path) = first_run_config.get("wordlist_path") {
334            config.wordlist_path = Some(wordlist_path.clone());
335
336            // Load the wordlist
337            match load_wordlist(wordlist_path) {
338                Ok(wordlist) => {
339                    config.wordlist = Some(wordlist);
340                }
341                Err(e) => {
342                    eprintln!(
343                        "Warning: Could not load wordlist at '{}': {}",
344                        wordlist_path, e
345                    );
346                    // Don't exit - just continue without the wordlist
347                }
348            }
349        }
350
351        // Save the config to file
352        save_config_to_file(&config, &path);
353        config
354    } else {
355        // Existing config - read and parse it
356        match read_config_file() {
357            Ok(contents) => {
358                let mut config = parse_toml_with_unknown_keys(&contents);
359
360                // If wordlist is specified in config file, set it in the config struct
361                if let Some(wordlist_path) = &config.wordlist_path {
362                    // Load the wordlist here in the config layer
363                    match load_wordlist(wordlist_path) {
364                        Ok(wordlist) => {
365                            config.wordlist = Some(wordlist);
366                        }
367                        Err(_e) => {
368                            // Critical error - exit if config specifies wordlist but can't load it
369                            eprintln!("Can't load wordlist at '{}'. Either fix or remove wordlist from config file at '{}'", 
370                                wordlist_path, path.display());
371                            std::process::exit(1);
372                        }
373                    }
374                }
375
376                config
377            }
378            Err(e) => {
379                eprintln!("Error reading config file: {}. Using defaults.", e);
380                Config::default()
381            }
382        }
383    }
384}
385
386/// Save a Config struct to a file
387fn save_config_to_file(config: &Config, path: &std::path::Path) {
388    let toml_string = toml::to_string_pretty(config).expect("Could not serialize config");
389    let mut file = File::create(path).expect("Could not create config file");
390    file.write_all(toml_string.as_bytes())
391        .expect("Could not write to config file");
392}