vsec 0.0.1

Detect secrets and in Rust codebases
Documentation
// src/registry/mod.rs

pub mod definition;

use dashmap::DashMap;
use std::path::Path;
use std::sync::Arc;

pub use definition::{ConstantDefinition, ConstantVisibility, RegistryStats};

/// Thread-safe registry of constant definitions across all files.
/// Uses DashMap for lock-free concurrent access during parallel scanning.
///
/// Keys are fully qualified (file::module::name) to avoid namespace collisions
/// between constants with the same name in different modules/files.
pub struct SuspectRegistry {
    /// Primary storage: qualified_key -> ConstantDefinition
    inner: Arc<DashMap<String, ConstantDefinition>>,
    /// Secondary index: name -> Vec<qualified_key> for fast name-based lookups
    by_name: Arc<DashMap<String, Vec<String>>>,
}

impl SuspectRegistry {
    /// Create a new empty registry
    pub fn new() -> Self {
        Self {
            inner: Arc::new(DashMap::new()),
            by_name: Arc::new(DashMap::new()),
        }
    }

    /// Insert a constant definition
    pub fn insert(&self, def: ConstantDefinition) {
        let qualified_key = def.qualified_key();
        let name = def.name.clone();

        // Insert into primary storage (no collisions with qualified keys)
        self.inner.insert(qualified_key.clone(), def);

        // Update secondary index
        self.by_name
            .entry(name)
            .and_modify(|keys| {
                if !keys.contains(&qualified_key) {
                    keys.push(qualified_key.clone());
                }
            })
            .or_insert_with(|| vec![qualified_key]);
    }

    /// Look up a constant by name, preferring definitions from the same file.
    /// Returns the value if found unambiguously, or the highest-scored match
    /// if there are multiple definitions with the same name.
    pub fn get_value(&self, name: &str) -> Option<String> {
        self.get(name).map(|def| def.value)
    }

    /// Look up a constant by name, preferring definitions from the same file.
    pub fn get(&self, name: &str) -> Option<ConstantDefinition> {
        let keys = self.by_name.get(name)?;

        if keys.is_empty() {
            return None;
        }

        // If there's only one definition, return it
        if keys.len() == 1 {
            return self.inner.get(&keys[0]).map(|e| e.clone());
        }

        // Multiple definitions exist - return the one with highest score
        // (conservative approach: more suspicious = more likely to be a real issue)
        keys.iter()
            .filter_map(|k| self.inner.get(k).map(|e| e.clone()))
            .max_by_key(|def| def.preliminary_score)
    }

    /// Look up a constant by name, preferring definitions from the specified file.
    /// This provides better accuracy when the caller knows which file they're analyzing.
    pub fn get_value_for_file(&self, name: &str, file: &Path) -> Option<String> {
        self.get_for_file(name, file).map(|def| def.value)
    }

    /// Look up a constant by name, preferring definitions from the specified file.
    pub fn get_for_file(&self, name: &str, file: &Path) -> Option<ConstantDefinition> {
        let keys = self.by_name.get(name)?;

        if keys.is_empty() {
            return None;
        }

        // If there's only one definition, return it
        if keys.len() == 1 {
            return self.inner.get(&keys[0]).map(|e| e.clone());
        }

        // Try to find a definition from the same file first
        let file_prefix = format!("{}::", file.display());
        for key in keys.iter() {
            if key.starts_with(&file_prefix) {
                if let Some(def) = self.inner.get(key) {
                    return Some(def.clone());
                }
            }
        }

        // No same-file match - return highest scored
        keys.iter()
            .filter_map(|k| self.inner.get(k).map(|e| e.clone()))
            .max_by_key(|def| def.preliminary_score)
    }

    /// Get all definitions with the given name (for diagnostics/debugging)
    pub fn get_all_by_name(&self, name: &str) -> Vec<ConstantDefinition> {
        self.by_name
            .get(name)
            .map(|keys| {
                keys.iter()
                    .filter_map(|k| self.inner.get(k).map(|e| e.clone()))
                    .collect()
            })
            .unwrap_or_default()
    }

    /// Check if registry contains a name
    pub fn contains(&self, name: &str) -> bool {
        self.by_name.contains_key(name)
    }

    /// Get statistics about the registry
    pub fn stats(&self) -> RegistryStats {
        let mut total = 0;
        let mut public = 0;
        let mut suspicious = 0;

        for entry in self.inner.iter() {
            total += 1;
            if entry.visibility == ConstantVisibility::Public {
                public += 1;
            }
            if entry.preliminary_score > 30 {
                suspicious += 1;
            }
        }

        RegistryStats {
            total,
            public,
            suspicious,
        }
    }

    /// Get the number of entries in the registry
    pub fn len(&self) -> usize {
        self.inner.len()
    }

    /// Check if the registry is empty
    pub fn is_empty(&self) -> bool {
        self.inner.is_empty()
    }

    /// Iterate over all entries
    pub fn iter(&self) -> impl Iterator<Item = ConstantDefinition> + '_ {
        self.inner.iter().map(|entry| entry.value().clone())
    }
}

impl Default for SuspectRegistry {
    fn default() -> Self {
        Self::new()
    }
}

impl Clone for SuspectRegistry {
    fn clone(&self) -> Self {
        Self {
            inner: Arc::clone(&self.inner),
            by_name: Arc::clone(&self.by_name),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::path::PathBuf;

    #[test]
    fn test_insert_and_get() {
        let registry = SuspectRegistry::new();
        let def = ConstantDefinition {
            name: "SECRET_KEY".into(),
            value: "abc123".into(),
            file: PathBuf::from("src/lib.rs"),
            module_path: String::new(),
            line: 10,
            visibility: ConstantVisibility::Public,
            preliminary_score: 50,
        };

        registry.insert(def);

        assert!(registry.contains("SECRET_KEY"));
        assert_eq!(registry.get_value("SECRET_KEY"), Some("abc123".into()));
    }

    #[test]
    fn test_same_name_different_files_both_stored() {
        let registry = SuspectRegistry::new();

        let def1 = ConstantDefinition {
            name: "TOKEN".into(),
            value: "value_from_a".into(),
            file: PathBuf::from("a.rs"),
            module_path: String::new(),
            line: 1,
            visibility: ConstantVisibility::Public,
            preliminary_score: 10,
        };

        let def2 = ConstantDefinition {
            name: "TOKEN".into(),
            value: "value_from_b".into(),
            file: PathBuf::from("b.rs"),
            module_path: String::new(),
            line: 1,
            visibility: ConstantVisibility::Public,
            preliminary_score: 50,
        };

        registry.insert(def1);
        registry.insert(def2);

        // Both definitions should be stored (no collision)
        assert_eq!(registry.len(), 2);

        // get_all_by_name returns both
        let all = registry.get_all_by_name("TOKEN");
        assert_eq!(all.len(), 2);

        // get_value returns highest scored when ambiguous
        assert_eq!(registry.get_value("TOKEN"), Some("value_from_b".into()));
    }

    #[test]
    fn test_get_for_file_prefers_same_file() {
        let registry = SuspectRegistry::new();

        let def1 = ConstantDefinition {
            name: "API_KEY".into(),
            value: "key_from_a".into(),
            file: PathBuf::from("a.rs"),
            module_path: String::new(),
            line: 1,
            visibility: ConstantVisibility::Public,
            preliminary_score: 10,
        };

        let def2 = ConstantDefinition {
            name: "API_KEY".into(),
            value: "key_from_b".into(),
            file: PathBuf::from("b.rs"),
            module_path: String::new(),
            line: 1,
            visibility: ConstantVisibility::Public,
            preliminary_score: 50, // Higher score, but not in same file
        };

        registry.insert(def1);
        registry.insert(def2);

        // When looking up from file a.rs, prefer the definition from a.rs
        assert_eq!(
            registry.get_value_for_file("API_KEY", Path::new("a.rs")),
            Some("key_from_a".into())
        );

        // When looking up from file b.rs, prefer the definition from b.rs
        assert_eq!(
            registry.get_value_for_file("API_KEY", Path::new("b.rs")),
            Some("key_from_b".into())
        );

        // When looking up from an unrelated file, return highest scored
        assert_eq!(
            registry.get_value_for_file("API_KEY", Path::new("c.rs")),
            Some("key_from_b".into())
        );
    }

    #[test]
    fn test_module_path_in_key() {
        let registry = SuspectRegistry::new();

        let def1 = ConstantDefinition {
            name: "SECRET".into(),
            value: "outer".into(),
            file: PathBuf::from("lib.rs"),
            module_path: String::new(),
            line: 1,
            visibility: ConstantVisibility::Public,
            preliminary_score: 10,
        };

        let def2 = ConstantDefinition {
            name: "SECRET".into(),
            value: "inner".into(),
            file: PathBuf::from("lib.rs"),
            module_path: "inner".into(),
            line: 10,
            visibility: ConstantVisibility::Public,
            preliminary_score: 20,
        };

        registry.insert(def1);
        registry.insert(def2);

        // Both are stored (different module paths)
        assert_eq!(registry.len(), 2);

        let all = registry.get_all_by_name("SECRET");
        assert_eq!(all.len(), 2);
    }
}