vsec 0.0.1

Detect secrets and in Rust codebases
Documentation
// src/scanner/indexer.rs

use rayon::prelude::*;
use std::path::{Path, PathBuf};

use crate::filters::BenignNameFilter;
use crate::registry::{ConstantDefinition, ConstantVisibility, SuspectRegistry};

/// Extract all public constant definitions from a codebase (Phase 1)
pub struct Indexer {
    registry: SuspectRegistry,
    name_filter: BenignNameFilter,
}

impl Indexer {
    pub fn new() -> Self {
        Self {
            registry: SuspectRegistry::new(),
            name_filter: BenignNameFilter::new(Default::default())
                .expect("Failed to create name filter"),
        }
    }

    /// Index all files in the given paths (Phase 1)
    pub fn index_files(self, files: &[PathBuf]) -> SuspectRegistry {
        files.par_iter().for_each(|file| {
            if let Ok(content) = std::fs::read_to_string(file) {
                if let Ok(ast) = syn::parse_file(&content) {
                    self.extract_definitions(&ast, file);
                }
            }
        });

        self.registry
    }

    fn extract_definitions(&self, file: &syn::File, path: &Path) {
        // Empty module path for top-level items
        let module_path = String::new();

        for item in &file.items {
            match item {
                syn::Item::Const(c) => {
                    self.process_const(c, path, &module_path);
                }
                syn::Item::Static(s) => {
                    self.process_static(s, path, &module_path);
                }
                syn::Item::Mod(m) => {
                    // Recurse into inline modules with updated path
                    if let Some((_, items)) = &m.content {
                        let nested_path = m.ident.to_string();
                        for item in items {
                            self.process_module_item(item, path, &nested_path);
                        }
                    }
                }
                _ => {}
            }
        }
    }

    fn process_const(&self, c: &syn::ItemConst, path: &Path, module_path: &str) {
        let visibility = ConstantVisibility::from_syn(&c.vis);

        // Only index public constants (they can be used across files)
        // Private constants are handled by file-local analysis
        if visibility == ConstantVisibility::Private {
            return;
        }

        let name = c.ident.to_string();

        // Early exit for definitely benign names
        if self.name_filter.is_definitely_benign(&name) {
            return;
        }

        if let Some(value) = Self::extract_string_value(&c.expr) {
            // Memory safety: Don't index massive strings
            // Secrets are rarely larger than 1KB
            if value.len() > 1024 {
                return;
            }

            // Calculate preliminary score based on name alone
            let name_factors = self.name_filter.analyze(&name);
            let preliminary_score: i32 = name_factors.iter().map(|f| f.contribution).sum();

            // Only index if it's not obviously benign
            if preliminary_score > -50 {
                let def = ConstantDefinition {
                    name: name.clone(),
                    value,
                    file: path.to_path_buf(),
                    module_path: module_path.to_string(),
                    line: c.ident.span().start().line as u32,
                    visibility,
                    preliminary_score,
                };

                self.registry.insert(def);
            }
        }
    }

    fn process_static(&self, s: &syn::ItemStatic, path: &Path, module_path: &str) {
        let visibility = ConstantVisibility::from_syn(&s.vis);

        if visibility == ConstantVisibility::Private {
            return;
        }

        let name = s.ident.to_string();

        if self.name_filter.is_definitely_benign(&name) {
            return;
        }

        if let Some(value) = Self::extract_string_value(&s.expr) {
            if value.len() > 1024 {
                return;
            }

            let name_factors = self.name_filter.analyze(&name);
            let preliminary_score: i32 = name_factors.iter().map(|f| f.contribution).sum();

            if preliminary_score > -50 {
                let def = ConstantDefinition {
                    name: name.clone(),
                    value,
                    file: path.to_path_buf(),
                    module_path: module_path.to_string(),
                    line: s.ident.span().start().line as u32,
                    visibility,
                    preliminary_score,
                };

                self.registry.insert(def);
            }
        }
    }

    fn process_module_item(&self, item: &syn::Item, path: &Path, module_path: &str) {
        match item {
            syn::Item::Const(c) => self.process_const(c, path, module_path),
            syn::Item::Static(s) => self.process_static(s, path, module_path),
            syn::Item::Mod(m) => {
                if let Some((_, items)) = &m.content {
                    // Build nested module path
                    let nested_path = if module_path.is_empty() {
                        m.ident.to_string()
                    } else {
                        format!("{}::{}", module_path, m.ident)
                    };
                    for item in items {
                        self.process_module_item(item, path, &nested_path);
                    }
                }
            }
            _ => {}
        }
    }

    /// Extract string value from an expression
    pub fn extract_string_value(expr: &syn::Expr) -> Option<String> {
        match expr {
            syn::Expr::Lit(lit) => match &lit.lit {
                syn::Lit::Str(s) => Some(s.value()),
                syn::Lit::ByteStr(s) => String::from_utf8(s.value()).ok(),
                _ => None,
            },
            // Handle byte array literals like b"\x70\x61\x73\x73"
            syn::Expr::Reference(r) => Self::try_decode_referenced_value(&r.expr),
            // Handle vec![...] macro patterns
            syn::Expr::Macro(mac) => {
                if mac
                    .mac
                    .path
                    .segments
                    .last()
                    .map(|s| s.ident == "vec")
                    .unwrap_or(false)
                {
                    Self::try_decode_vec_macro(&mac.mac.tokens)
                } else {
                    None
                }
            }
            // Handle String::from_utf8(vec![...]) patterns
            syn::Expr::Call(call) => {
                let func_name = quote::quote!(#call.func).to_string();
                if func_name.contains("from_utf8") {
                    call.args.first().and_then(Self::extract_string_value)
                } else {
                    None
                }
            }
            syn::Expr::Group(g) => Self::extract_string_value(&g.expr),
            syn::Expr::Paren(p) => Self::extract_string_value(&p.expr),
            _ => None,
        }
    }

    /// Try to decode byte/char arrays
    fn try_decode_referenced_value(expr: &syn::Expr) -> Option<String> {
        if let syn::Expr::Array(arr) = expr {
            // Try to decode as byte values: &[112, 97, 115, 115]
            let bytes: Option<Vec<u8>> = arr
                .elems
                .iter()
                .map(|e| {
                    if let syn::Expr::Lit(lit) = e {
                        match &lit.lit {
                            syn::Lit::Int(i) => i.base10_parse::<u8>().ok(),
                            syn::Lit::Byte(b) => Some(b.value()),
                            _ => None,
                        }
                    } else {
                        None
                    }
                })
                .collect();

            if let Some(bytes) = bytes {
                if let Ok(s) = String::from_utf8(bytes) {
                    return Some(s);
                }
            }

            // Try to decode as char values: &['p', 'a', 's', 's']
            let chars: Option<String> = arr
                .elems
                .iter()
                .map(|e| {
                    if let syn::Expr::Lit(lit) = e {
                        if let syn::Lit::Char(c) = &lit.lit {
                            return Some(c.value());
                        }
                    }
                    None
                })
                .collect();

            return chars;
        }
        None
    }

    /// Try to decode vec![...] with byte literals
    fn try_decode_vec_macro(tokens: &proc_macro2::TokenStream) -> Option<String> {
        use syn::parse::Parser;
        use syn::{Expr, ExprLit, Lit};

        // Try to parse as a punctuated list of expressions
        let parser = syn::punctuated::Punctuated::<Expr, syn::Token![,]>::parse_terminated;
        let exprs: syn::punctuated::Punctuated<Expr, syn::Token![,]> =
            match parser.parse2(tokens.clone()) {
                Ok(exprs) => exprs,
                Err(_) => return None,
            };

        // Extract byte values from integer literals
        let bytes: Option<Vec<u8>> = exprs
            .iter()
            .map(|expr| {
                if let Expr::Lit(ExprLit {
                    lit: Lit::Int(int), ..
                }) = expr
                {
                    int.base10_parse::<u8>().ok()
                } else if let Expr::Lit(ExprLit {
                    lit: Lit::Byte(b), ..
                }) = expr
                {
                    Some(b.value())
                } else {
                    None
                }
            })
            .collect();

        bytes.and_then(|b| String::from_utf8(b).ok())
    }
}

impl Default for Indexer {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_extract_string_literal() {
        let expr: syn::Expr = syn::parse_quote!("hello");
        assert_eq!(Indexer::extract_string_value(&expr), Some("hello".into()));
    }

    #[test]
    fn test_extract_byte_string() {
        let expr: syn::Expr = syn::parse_quote!(b"hello");
        assert_eq!(Indexer::extract_string_value(&expr), Some("hello".into()));
    }

    #[test]
    fn test_extract_byte_array() {
        let expr: syn::Expr = syn::parse_quote!(&[104, 101, 108, 108, 111]); // "hello"
        assert_eq!(Indexer::extract_string_value(&expr), Some("hello".into()));
    }
}