use rayon::prelude::*;
use std::path::{Path, PathBuf};
use crate::filters::BenignNameFilter;
use crate::registry::{ConstantDefinition, ConstantVisibility, SuspectRegistry};
pub struct Indexer {
registry: SuspectRegistry,
name_filter: BenignNameFilter,
}
impl Indexer {
pub fn new() -> Self {
Self {
registry: SuspectRegistry::new(),
name_filter: BenignNameFilter::new(Default::default())
.expect("Failed to create name filter"),
}
}
pub fn index_files(self, files: &[PathBuf]) -> SuspectRegistry {
files.par_iter().for_each(|file| {
if let Ok(content) = std::fs::read_to_string(file) {
if let Ok(ast) = syn::parse_file(&content) {
self.extract_definitions(&ast, file);
}
}
});
self.registry
}
fn extract_definitions(&self, file: &syn::File, path: &Path) {
let module_path = String::new();
for item in &file.items {
match item {
syn::Item::Const(c) => {
self.process_const(c, path, &module_path);
}
syn::Item::Static(s) => {
self.process_static(s, path, &module_path);
}
syn::Item::Mod(m) => {
if let Some((_, items)) = &m.content {
let nested_path = m.ident.to_string();
for item in items {
self.process_module_item(item, path, &nested_path);
}
}
}
_ => {}
}
}
}
fn process_const(&self, c: &syn::ItemConst, path: &Path, module_path: &str) {
let visibility = ConstantVisibility::from_syn(&c.vis);
if visibility == ConstantVisibility::Private {
return;
}
let name = c.ident.to_string();
if self.name_filter.is_definitely_benign(&name) {
return;
}
if let Some(value) = Self::extract_string_value(&c.expr) {
if value.len() > 1024 {
return;
}
let name_factors = self.name_filter.analyze(&name);
let preliminary_score: i32 = name_factors.iter().map(|f| f.contribution).sum();
if preliminary_score > -50 {
let def = ConstantDefinition {
name: name.clone(),
value,
file: path.to_path_buf(),
module_path: module_path.to_string(),
line: c.ident.span().start().line as u32,
visibility,
preliminary_score,
};
self.registry.insert(def);
}
}
}
fn process_static(&self, s: &syn::ItemStatic, path: &Path, module_path: &str) {
let visibility = ConstantVisibility::from_syn(&s.vis);
if visibility == ConstantVisibility::Private {
return;
}
let name = s.ident.to_string();
if self.name_filter.is_definitely_benign(&name) {
return;
}
if let Some(value) = Self::extract_string_value(&s.expr) {
if value.len() > 1024 {
return;
}
let name_factors = self.name_filter.analyze(&name);
let preliminary_score: i32 = name_factors.iter().map(|f| f.contribution).sum();
if preliminary_score > -50 {
let def = ConstantDefinition {
name: name.clone(),
value,
file: path.to_path_buf(),
module_path: module_path.to_string(),
line: s.ident.span().start().line as u32,
visibility,
preliminary_score,
};
self.registry.insert(def);
}
}
}
fn process_module_item(&self, item: &syn::Item, path: &Path, module_path: &str) {
match item {
syn::Item::Const(c) => self.process_const(c, path, module_path),
syn::Item::Static(s) => self.process_static(s, path, module_path),
syn::Item::Mod(m) => {
if let Some((_, items)) = &m.content {
let nested_path = if module_path.is_empty() {
m.ident.to_string()
} else {
format!("{}::{}", module_path, m.ident)
};
for item in items {
self.process_module_item(item, path, &nested_path);
}
}
}
_ => {}
}
}
pub fn extract_string_value(expr: &syn::Expr) -> Option<String> {
match expr {
syn::Expr::Lit(lit) => match &lit.lit {
syn::Lit::Str(s) => Some(s.value()),
syn::Lit::ByteStr(s) => String::from_utf8(s.value()).ok(),
_ => None,
},
syn::Expr::Reference(r) => Self::try_decode_referenced_value(&r.expr),
syn::Expr::Macro(mac) => {
if mac
.mac
.path
.segments
.last()
.map(|s| s.ident == "vec")
.unwrap_or(false)
{
Self::try_decode_vec_macro(&mac.mac.tokens)
} else {
None
}
}
syn::Expr::Call(call) => {
let func_name = quote::quote!(#call.func).to_string();
if func_name.contains("from_utf8") {
call.args.first().and_then(Self::extract_string_value)
} else {
None
}
}
syn::Expr::Group(g) => Self::extract_string_value(&g.expr),
syn::Expr::Paren(p) => Self::extract_string_value(&p.expr),
_ => None,
}
}
fn try_decode_referenced_value(expr: &syn::Expr) -> Option<String> {
if let syn::Expr::Array(arr) = expr {
let bytes: Option<Vec<u8>> = arr
.elems
.iter()
.map(|e| {
if let syn::Expr::Lit(lit) = e {
match &lit.lit {
syn::Lit::Int(i) => i.base10_parse::<u8>().ok(),
syn::Lit::Byte(b) => Some(b.value()),
_ => None,
}
} else {
None
}
})
.collect();
if let Some(bytes) = bytes {
if let Ok(s) = String::from_utf8(bytes) {
return Some(s);
}
}
let chars: Option<String> = arr
.elems
.iter()
.map(|e| {
if let syn::Expr::Lit(lit) = e {
if let syn::Lit::Char(c) = &lit.lit {
return Some(c.value());
}
}
None
})
.collect();
return chars;
}
None
}
fn try_decode_vec_macro(tokens: &proc_macro2::TokenStream) -> Option<String> {
use syn::parse::Parser;
use syn::{Expr, ExprLit, Lit};
let parser = syn::punctuated::Punctuated::<Expr, syn::Token![,]>::parse_terminated;
let exprs: syn::punctuated::Punctuated<Expr, syn::Token![,]> =
match parser.parse2(tokens.clone()) {
Ok(exprs) => exprs,
Err(_) => return None,
};
let bytes: Option<Vec<u8>> = exprs
.iter()
.map(|expr| {
if let Expr::Lit(ExprLit {
lit: Lit::Int(int), ..
}) = expr
{
int.base10_parse::<u8>().ok()
} else if let Expr::Lit(ExprLit {
lit: Lit::Byte(b), ..
}) = expr
{
Some(b.value())
} else {
None
}
})
.collect();
bytes.and_then(|b| String::from_utf8(b).ok())
}
}
impl Default for Indexer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_string_literal() {
let expr: syn::Expr = syn::parse_quote!("hello");
assert_eq!(Indexer::extract_string_value(&expr), Some("hello".into()));
}
#[test]
fn test_extract_byte_string() {
let expr: syn::Expr = syn::parse_quote!(b"hello");
assert_eq!(Indexer::extract_string_value(&expr), Some("hello".into()));
}
#[test]
fn test_extract_byte_array() {
let expr: syn::Expr = syn::parse_quote!(&[104, 101, 108, 108, 111]); assert_eq!(Indexer::extract_string_value(&expr), Some("hello".into()));
}
}