clapfig 0.18.2 - Docs.rs

//! Strict-mode validation: detect unknown keys in config files.
//!
//! Operates on an already-parsed [`toml::Table`] so it sees exactly the same
//! keys that will reach the merge step. When kebab-case normalization is
//! enabled the table arrives with `-` already rewritten to `_`, and the
//! line-number lookup is taught to match keys regardless of dash/underscore
//! spelling so error messages still point at the user's original line.

use std::path::Path;
use std::sync::Arc;

use confique::Config;
use serde::Deserialize;
use toml::{Table, Value};

use crate::error::{ClapfigError, UnknownKeyInfo};
use crate::normalize::normalize_key;

/// Validate that `table` contains no keys unknown to config type `C`.
///
/// `source` is the original TOML file text — retained only so the resulting
/// [`UnknownKeyInfo`] can carry a snippet and a 1-indexed line number.
/// `normalize_keys` controls whether the line-number lookup treats `-` and
/// `_` as interchangeable (matching how the table itself was produced).
pub fn validate_unknown_keys<C: Config>(
    table: &Table,
    source: &str,
    path: &Path,
    normalize_keys: bool,
) -> Result<(), ClapfigError>
where
    C::Layer: for<'de> Deserialize<'de>,
{
    let mut unknown_keys: Vec<String> = Vec::new();

    let value = Value::Table(table.clone());
    let _layer: C::Layer = serde_ignored::deserialize(value, |ignored_path| {
        unknown_keys.push(ignored_path.to_string());
    })
    .map_err(|e| ClapfigError::ParseError {
        path: path.to_path_buf(),
        source: Box::new(e),
        source_text: Some(Arc::from(source)),
    })?;

    if unknown_keys.is_empty() {
        return Ok(());
    }

    let source_arc: Arc<str> = Arc::from(source);
    let infos: Vec<UnknownKeyInfo> = unknown_keys
        .into_iter()
        .map(|key| {
            let line = find_key_line(source, &key, normalize_keys);
            UnknownKeyInfo {
                key,
                path: path.to_path_buf(),
                line,
                source: Some(Arc::clone(&source_arc)),
            }
        })
        .collect();

    Err(ClapfigError::UnknownKeys(infos))
}

/// Find the 1-indexed line number for a key in TOML content.
///
/// For a dotted key like `"database.typo"`, tracks the current `[section]` header
/// while scanning and only matches the leaf key when inside the correct section.
///
/// When `normalize_keys` is true, the comparison treats `-` and `_` as the same
/// character — so a normalized lookup key like `"pool_size"` still locates a
/// source line that reads `pool-size = 5`.
///
/// This is a best-effort heuristic — it handles standard `[section]` headers and
/// bare key assignments but does not handle quoted keys or inline tables.
/// Returns 0 if the key cannot be located.
fn find_key_line(content: &str, dotted_key: &str, normalize_keys: bool) -> usize {
    let segments: Vec<&str> = dotted_key.split('.').collect();
    let leaf = segments.last().unwrap_or(&dotted_key);
    let expected_section = &segments[..segments.len() - 1]; // empty for top-level

    let mut current_section: Vec<String> = Vec::new();

    for (i, line) in content.lines().enumerate() {
        let trimmed = line.trim();

        // Track [section] headers
        if trimmed.starts_with('[') && !trimmed.starts_with("[[") {
            let header = trimmed.trim_start_matches('[').trim_end_matches(']').trim();
            current_section = header.split('.').map(|s| s.trim().to_string()).collect();
            continue;
        }

        // Check if we're in the right section
        let in_right_section = expected_section.len() == current_section.len()
            && expected_section
                .iter()
                .zip(&current_section)
                .all(|(a, b)| keys_match(a, b, normalize_keys));

        if !in_right_section {
            continue;
        }

        // Manually pull "<key> = ..." so we can compare the key under the
        // normalization rule rather than relying on a literal prefix match.
        if let Some((candidate, rest)) = trimmed.split_once('=')
            && keys_match(candidate.trim_end(), leaf, normalize_keys)
            && !rest.is_empty()
        {
            return i + 1;
        }
    }
    0
}

/// Compare two keys for equality, optionally treating `-` and `_` as the
/// same character. The trim allows callers to hand in raw section/key
/// fragments without pre-trimming.
fn keys_match(a: &str, b: &str, normalize_keys: bool) -> bool {
    let a = a.trim();
    let b = b.trim();
    if normalize_keys {
        normalize_key(a) == normalize_key(b)
    } else {
        a == b
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::fixtures::test::TestConfig;
    use std::path::PathBuf;

    fn path() -> PathBuf {
        PathBuf::from("/test/config.toml")
    }

    fn parse(content: &str) -> Table {
        content.parse::<Table>().unwrap()
    }

    #[test]
    fn valid_config_passes() {
        let content = r#"
host = "0.0.0.0"
port = 3000
debug = true

[database]
url = "postgres://localhost"
pool_size = 10
"#;
        let result = validate_unknown_keys::<TestConfig>(&parse(content), content, &path(), false);
        assert!(result.is_ok());
    }

    #[test]
    fn unknown_top_level_key() {
        let content = "host = \"localhost\"\ntypo_key = 42\n";
        let result = validate_unknown_keys::<TestConfig>(&parse(content), content, &path(), false);
        let err = result.unwrap_err();
        let keys = err.unknown_keys().expect("expected UnknownKeys");
        assert_eq!(keys.len(), 1);
        assert_eq!(keys[0].key, "typo_key");
        assert_eq!(keys[0].line, 2);
        assert!(keys[0].source.is_some());
    }

    #[test]
    fn unknown_nested_key() {
        let content = "[database]\nurl = \"pg://\"\ntypo = \"bad\"\n";
        let result = validate_unknown_keys::<TestConfig>(&parse(content), content, &path(), false);
        let err = result.unwrap_err();
        let keys = err.unknown_keys().expect("expected UnknownKeys");
        assert_eq!(keys.len(), 1);
        assert_eq!(keys[0].key, "database.typo");
        assert_eq!(keys[0].leaf(), "typo");
    }

    #[test]
    fn multiple_unknown_keys() {
        let content = "typo1 = 1\ntypo2 = 2\n";
        let result = validate_unknown_keys::<TestConfig>(&parse(content), content, &path(), false);
        let err = result.unwrap_err();
        let keys = err.unknown_keys().expect("expected UnknownKeys");
        assert_eq!(keys.len(), 2);
    }

    #[test]
    fn line_number_accuracy() {
        let content = "host = \"x\"\nport = 8080\ndebug = false\n\n# comment\nbad_key = 1\n";
        let result = validate_unknown_keys::<TestConfig>(&parse(content), content, &path(), false);
        let err = result.unwrap_err();
        let keys = err.unknown_keys().expect("expected UnknownKeys");
        assert_eq!(keys[0].line, 6);
    }

    #[test]
    fn empty_content_ok() {
        let table = Table::new();
        let result = validate_unknown_keys::<TestConfig>(&table, "", &path(), false);
        assert!(result.is_ok());
    }

    #[test]
    fn known_optional_field_ok() {
        let content = "[database]\nurl = \"pg://\"\n";
        let result = validate_unknown_keys::<TestConfig>(&parse(content), content, &path(), false);
        assert!(result.is_ok());
    }

    #[test]
    fn sparse_config_ok() {
        let content = "port = 3000\n";
        let result = validate_unknown_keys::<TestConfig>(&parse(content), content, &path(), false);
        assert!(result.is_ok());
    }

    #[test]
    fn error_includes_file_path() {
        let content = "typo = 1\n";
        let p = PathBuf::from("/home/user/.config/myapp/config.toml");
        let err =
            validate_unknown_keys::<TestConfig>(&parse(content), content, &p, false).unwrap_err();
        let msg = err.to_string();
        assert!(msg.contains("config.toml") || msg.contains("Unknown keys"));
    }

    #[test]
    fn line_number_finds_correct_section_for_duplicate_leaf() {
        let content = "host = \"x\"\nport = 8080\n[database]\ntypo = \"bad\"\n";
        let result = validate_unknown_keys::<TestConfig>(&parse(content), content, &path(), false);
        let err = result.unwrap_err();
        let keys = err.unknown_keys().expect("expected UnknownKeys");
        assert_eq!(keys[0].key, "database.typo");
        assert_eq!(keys[0].line, 4);
    }

    #[test]
    fn line_number_top_level_not_confused_by_nested_same_name() {
        let content = "typo = 99\n[database]\npool_size = 5\n";
        let result = validate_unknown_keys::<TestConfig>(&parse(content), content, &path(), false);
        let err = result.unwrap_err();
        let keys = err.unknown_keys().expect("expected UnknownKeys");
        assert_eq!(keys[0].key, "typo");
        assert_eq!(keys[0].line, 1);
    }

    // -- normalize_keys = true ------------------------------------------------

    use crate::normalize::normalize_table;

    fn parse_and_normalize(content: &str) -> Table {
        let mut t = parse(content);
        normalize_table(&mut t).expect("test fixtures must not contain collisions");
        t
    }

    #[test]
    fn normalize_kebab_top_level_key_is_valid() {
        // `pool_size` isn't a top-level field but `host` is — exercise the
        // happy path where a kebab key normalizes to a known snake_case field.
        // TestConfig has `host` (no dashes available), so use a synthetic case
        // through nested database.pool-size — see the next test for the real
        // pool_size case.
        let content = "host = \"x\"\n";
        let table = parse_and_normalize(content);
        let result = validate_unknown_keys::<TestConfig>(&table, content, &path(), true);
        assert!(result.is_ok());
    }

    #[test]
    fn normalize_kebab_nested_key_is_valid() {
        // `pool-size` in source → `pool_size` after normalize_table → matches
        // the `pool_size` field on TestDbConfig.
        let content = "[database]\npool-size = 25\n";
        let table = parse_and_normalize(content);
        let result = validate_unknown_keys::<TestConfig>(&table, content, &path(), true);
        assert!(result.is_ok(), "kebab key should be accepted: {result:?}");
    }

    #[test]
    fn normalize_kebab_typo_reports_line_at_kebab_source() {
        // User typed a kebab-cased typo. After normalize, the reported key is
        // in snake form. The line-number lookup must still locate the kebab
        // line in the original source.
        let content = "host = \"x\"\n[database]\npool-zize = 99\n";
        let table = parse_and_normalize(content);
        let result = validate_unknown_keys::<TestConfig>(&table, content, &path(), true);
        let err = result.unwrap_err();
        let keys = err.unknown_keys().expect("expected UnknownKeys");
        assert_eq!(keys.len(), 1);
        // The reported key is in normalized (snake) form …
        assert_eq!(keys[0].key, "database.pool_zize");
        // … but the line still points at the kebab line in the original file.
        assert_eq!(keys[0].line, 3);
    }

    #[test]
    fn normalize_kebab_section_header_resolves_line() {
        // Section header itself is kebab in the source. `find_key_line` must
        // match it against the normalized expected section name.
        let content = "[my-section]\nfoo = 1\n";
        let table = parse_and_normalize(content);
        // `my-section` isn't a known field; we just want to confirm the
        // unknown-key lookup found a line (non-zero) using kebab matching on
        // the section header.
        let err = validate_unknown_keys::<TestConfig>(&table, content, &path(), true).unwrap_err();
        let keys = err.unknown_keys().expect("expected UnknownKeys");
        // Top-level `my_section` is the unknown key here.
        assert!(keys.iter().any(|k| k.key == "my_section"));
    }

    #[test]
    fn normalize_off_treats_kebab_as_unknown() {
        // Sanity check: with normalization disabled, `pool-size` still fails
        // strict validation the way it always has.
        let content = "[database]\npool-size = 25\n";
        let table = parse(content);
        let result = validate_unknown_keys::<TestConfig>(&table, content, &path(), false);
        assert!(result.is_err());
    }
}