use log::{debug, warn};
use ouroboros::self_referencing;
use std::{path::Path, sync::Arc};
use crate::{
cilassembly::CilAssembly,
file::File,
metadata::{
cor20header::Cor20Header,
diagnostics::{DiagnosticCategory, Diagnostics},
identity::{AssemblyIdentity, AssemblyVersion, Identity, ProcessorArchitecture},
root::Root,
streams::{Blob, Guid, StreamHeader, Strings, TablesHeader, UserStrings},
tables::{AssemblyProcessorRaw, AssemblyRaw, AssemblyRefRaw, ModuleRaw},
validation::ValidationEngine,
},
Error, Result, ValidationConfig,
};
pub struct CilAssemblyViewData<'a> {
pub file: Arc<File>,
pub data: &'a [u8],
pub cor20header: Cor20Header,
pub metadata_root: Root,
pub metadata_tables: Option<TablesHeader<'a>>,
pub strings: Option<Strings<'a>>,
pub userstrings: Option<UserStrings<'a>>,
pub guids: Option<Guid<'a>>,
pub blobs: Option<Blob<'a>>,
pub diagnostics: Arc<Diagnostics>,
}
impl<'a> CilAssemblyViewData<'a> {
pub fn from_dotscope_file(file: Arc<File>, data: &'a [u8]) -> Result<Self> {
let (clr_rva, clr_size) = file.clr().ok_or(Error::NotSupported)?;
if clr_rva == 0 || clr_size == 0 {
return Err(Error::NotSupported);
}
let clr_offset = file.rva_to_offset(clr_rva)?;
let clr_end = clr_offset
.checked_add(clr_size)
.ok_or(out_of_bounds_error!())?;
if clr_size > data.len() || clr_offset > data.len() || clr_end > data.len() {
return Err(out_of_bounds_error!());
}
let cor20_header = Cor20Header::read(&data[clr_offset..clr_end])?;
debug!(
"PE header: CLR {}.{}, metadata at RVA 0x{:X}",
cor20_header.major_runtime_version,
cor20_header.minor_runtime_version,
cor20_header.meta_data_rva
);
let metadata_offset = file.rva_to_offset(cor20_header.meta_data_rva as usize)?;
let metadata_end = metadata_offset
.checked_add(cor20_header.meta_data_size as usize)
.ok_or(out_of_bounds_error!())?;
if metadata_end > data.len() {
return Err(out_of_bounds_error!());
}
let metadata_slice = &data[metadata_offset..metadata_end];
let metadata_root = Root::read(metadata_slice)?;
let stream_names: Vec<&str> = metadata_root
.stream_headers
.iter()
.map(|s| s.name.as_str())
.collect();
debug!("Metadata streams: {}", stream_names.join(", "));
let diagnostics = Arc::new(Diagnostics::new());
let mut metadata_tables = None;
let mut strings_heap = None;
let mut userstrings_heap = None;
let mut guid_heap = None;
let mut blob_heap = None;
let mut seen_tables = false;
let mut seen_strings = false;
let mut seen_userstrings = false;
let mut seen_guid = false;
let mut seen_blob = false;
for stream in &metadata_root.stream_headers {
let stream_offset = stream.offset as usize;
let stream_size = stream.size as usize;
let stream_end = stream_offset
.checked_add(stream_size)
.ok_or(out_of_bounds_error!())?;
if stream_end > metadata_slice.len() {
return Err(out_of_bounds_error!());
}
let stream_data = &metadata_slice[stream_offset..stream_end];
match stream.name.as_str() {
"#~" | "#-" => {
if seen_tables {
warn!(
"Duplicate metadata stream '{}' detected, using first occurrence",
stream.name
);
diagnostics.warning(
DiagnosticCategory::Heap,
format!(
"Duplicate metadata tables stream '{}' found, using first occurrence",
stream.name
),
);
} else {
metadata_tables = Some(TablesHeader::from(stream_data)?);
seen_tables = true;
}
}
"#Strings" => {
if seen_strings {
warn!(
"Duplicate metadata stream '#Strings' detected, using first occurrence"
);
diagnostics.warning(
DiagnosticCategory::Heap,
"Duplicate #Strings heap found, using first occurrence",
);
} else {
strings_heap = Some(Strings::from(stream_data)?);
seen_strings = true;
}
}
"#US" => {
if seen_userstrings {
warn!("Duplicate metadata stream '#US' detected, using first occurrence");
diagnostics.warning(
DiagnosticCategory::Heap,
"Duplicate #US heap found, using first occurrence",
);
} else {
userstrings_heap = Some(UserStrings::from(stream_data)?);
seen_userstrings = true;
}
}
"#GUID" => {
if seen_guid {
warn!("Duplicate metadata stream '#GUID' detected, using first occurrence");
diagnostics.warning(
DiagnosticCategory::Heap,
"Duplicate #GUID heap found, using first occurrence",
);
} else {
guid_heap = Some(Guid::from(stream_data)?);
seen_guid = true;
}
}
"#Blob" => {
if seen_blob {
warn!("Duplicate metadata stream '#Blob' detected, using first occurrence");
diagnostics.warning(
DiagnosticCategory::Heap,
"Duplicate #Blob heap found, using first occurrence",
);
} else {
blob_heap = Some(Blob::from(stream_data)?);
seen_blob = true;
}
}
_ => {
diagnostics.info(
DiagnosticCategory::Heap,
format!("Unknown metadata stream '{}' encountered", stream.name),
);
}
}
}
Ok(CilAssemblyViewData {
file,
data,
cor20header: cor20_header,
metadata_root,
metadata_tables,
strings: strings_heap,
userstrings: userstrings_heap,
guids: guid_heap,
blobs: blob_heap,
diagnostics,
})
}
}
#[self_referencing]
pub struct CilAssemblyView {
file: Arc<File>,
#[borrows(file)]
#[not_covariant]
data: CilAssemblyViewData<'this>,
}
impl CilAssemblyView {
pub fn from_path(path: impl AsRef<Path>) -> Result<Self> {
Self::from_path_with_validation(path, ValidationConfig::production())
}
pub fn from_path_with_validation(
path: impl AsRef<Path>,
validation_config: ValidationConfig,
) -> Result<Self> {
let input = Arc::new(File::from_path(path)?);
Self::load_with_validation(input, validation_config)
}
pub fn from_mem(data: Vec<u8>) -> Result<Self> {
Self::from_mem_with_validation(data, ValidationConfig::production())
}
pub fn from_mem_with_validation(
data: Vec<u8>,
validation_config: ValidationConfig,
) -> Result<Self> {
let input = Arc::new(File::from_mem(data)?);
Self::load_with_validation(input, validation_config)
}
pub fn from_dotscope_file(file: File) -> Result<Self> {
Self::from_dotscope_file_with_validation(file, ValidationConfig::production())
}
pub fn from_dotscope_file_with_validation(
file: File,
validation_config: ValidationConfig,
) -> Result<Self> {
let file_arc = Arc::new(file);
Self::load_with_validation(file_arc, validation_config)
}
pub fn from_std_file(file: std::fs::File) -> Result<Self> {
Self::from_std_file_with_validation(file, ValidationConfig::production())
}
pub fn from_std_file_with_validation(
file: std::fs::File,
validation_config: ValidationConfig,
) -> Result<Self> {
let pe_file = File::from_std_file(file)?;
Self::from_dotscope_file_with_validation(pe_file, validation_config)
}
pub fn from_reader<R: std::io::Read>(reader: R) -> Result<Self> {
Self::from_reader_with_validation(reader, ValidationConfig::production())
}
pub fn from_reader_with_validation<R: std::io::Read>(
reader: R,
validation_config: ValidationConfig,
) -> Result<Self> {
let pe_file = File::from_reader(reader)?;
Self::from_dotscope_file_with_validation(pe_file, validation_config)
}
fn load_with_validation(file: Arc<File>, validation_config: ValidationConfig) -> Result<Self> {
let view = CilAssemblyView::try_new(file, |file| {
CilAssemblyViewData::from_dotscope_file(file.clone(), file.data())
})?;
if validation_config.should_validate_raw() {
view.validate(validation_config)?;
}
Ok(view)
}
#[must_use]
pub fn cor20header(&self) -> &Cor20Header {
self.with_data(|data| &data.cor20header)
}
#[must_use]
pub fn metadata_root(&self) -> &Root {
self.with_data(|data| &data.metadata_root)
}
#[must_use]
pub fn tables(&self) -> Option<&TablesHeader<'_>> {
self.with_data(|data| data.metadata_tables.as_ref())
}
#[must_use]
pub fn strings(&self) -> Option<&Strings<'_>> {
self.with_data(|data| data.strings.as_ref())
}
#[must_use]
pub fn userstrings(&self) -> Option<&UserStrings<'_>> {
self.with_data(|data| data.userstrings.as_ref())
}
#[must_use]
pub fn guids(&self) -> Option<&Guid<'_>> {
self.with_data(|data| data.guids.as_ref())
}
#[must_use]
pub fn blobs(&self) -> Option<&Blob<'_>> {
self.with_data(|data| data.blobs.as_ref())
}
#[must_use]
pub fn streams(&self) -> &[StreamHeader] {
self.with_data(|data| &data.metadata_root.stream_headers)
}
#[must_use]
pub fn file(&self) -> &Arc<File> {
self.borrow_file()
}
#[must_use]
pub fn data(&self) -> &[u8] {
self.with_data(|data| data.data)
}
#[must_use]
pub fn diagnostics(&self) -> &Arc<Diagnostics> {
self.with_data(|data| &data.diagnostics)
}
#[must_use]
pub fn to_owned(self) -> CilAssembly {
CilAssembly::new(self)
}
pub fn validate(&self, config: ValidationConfig) -> Result<()> {
if config == ValidationConfig::disabled() {
return Ok(());
}
let engine = ValidationEngine::new(self, config)?;
let result = engine.execute_stage1_validation(self, None)?;
result.into_result()
}
#[must_use]
pub fn dependencies(&self) -> Vec<AssemblyIdentity> {
let (Some(tables), Some(strings), Some(blobs)) =
(self.tables(), self.strings(), self.blobs())
else {
return Vec::new();
};
let Some(assembly_ref_table) = tables.table::<AssemblyRefRaw>() else {
return Vec::new();
};
assembly_ref_table
.into_iter()
.filter_map(|row| row.to_owned(strings, blobs).ok())
.map(|assembly_ref| AssemblyIdentity::from_assembly_ref(&assembly_ref))
.collect()
}
pub fn identity(&self) -> Result<Option<AssemblyIdentity>> {
let (Some(tables), Some(strings)) = (self.tables(), self.strings()) else {
return Err(malformed_error!(
"Metadata tables or strings heap are missing"
));
};
let Some(assembly_table) = tables.table::<AssemblyRaw>() else {
return Ok(None);
};
let Some(assembly_row) = assembly_table.iter().next() else {
return Ok(None);
};
let blobs = self.blobs();
let name = strings.get(assembly_row.name as usize)?.to_string();
let culture = if assembly_row.culture == 0 {
None
} else {
Some(strings.get(assembly_row.culture as usize)?.to_string())
};
let strong_name = if assembly_row.public_key == 0 {
None
} else if let Some(blobs) = blobs {
Some(Identity::PubKey(
blobs.get(assembly_row.public_key as usize)?.to_vec(),
))
} else {
return Err(malformed_error!(
"Assembly has public key reference but no blob heap"
));
};
let processor_architecture = tables
.table::<AssemblyProcessorRaw>()
.and_then(|proc_table| proc_table.iter().next())
.and_then(|proc| ProcessorArchitecture::try_from(proc.processor).ok());
#[allow(clippy::cast_possible_truncation)]
Ok(Some(AssemblyIdentity {
name,
version: AssemblyVersion {
major: assembly_row.major_version as u16,
minor: assembly_row.minor_version as u16,
build: assembly_row.build_number as u16,
revision: assembly_row.revision_number as u16,
},
culture,
strong_name,
processor_architecture,
}))
}
pub fn module_name(&self) -> Option<String> {
let tables = self.tables()?;
let strings = self.strings()?;
let module_table = tables.table::<ModuleRaw>()?;
let module_row = module_table.iter().next()?;
strings.get(module_row.name as usize).ok().map(String::from)
}
#[must_use]
pub fn is_netmodule(&self) -> bool {
let Some(tables) = self.tables() else {
return false;
};
let has_module = tables.table::<ModuleRaw>().is_some();
let has_assembly = tables
.table::<AssemblyRaw>()
.is_some_and(|t| t.iter().next().is_some());
has_module && !has_assembly
}
pub fn identity_or_fallback(&self) -> Result<AssemblyIdentity> {
if let Some(identity) = self.identity()? {
return Ok(identity);
}
let module_name = self.module_name().ok_or_else(|| {
malformed_error!("Neither Assembly nor Module table available for identity")
})?;
let name = module_name
.rsplit_once('.')
.map(|(base, _)| base.to_string())
.unwrap_or(module_name);
Ok(AssemblyIdentity {
name,
version: AssemblyVersion {
major: 0,
minor: 0,
build: 0,
revision: 0,
},
culture: None,
strong_name: None,
processor_architecture: None,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test::factories::metadata::cilassemblyview::verify_assembly_view_complete;
use std::{fs, path::PathBuf};
#[test]
fn from_file() {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/samples/WindowsBase.dll");
let view = CilAssemblyView::from_path(&path).unwrap();
verify_assembly_view_complete(&view);
}
#[test]
fn from_buffer() {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/samples/WindowsBase.dll");
let data = fs::read(path).unwrap();
let view = CilAssemblyView::from_mem(data.clone()).unwrap();
assert_eq!(view.data(), data.as_slice());
verify_assembly_view_complete(&view);
}
#[test]
fn test_error_handling() {
let result = CilAssemblyView::from_path(Path::new("non_existent_file.dll"));
assert!(result.is_err());
let invalid_data = vec![0u8; 100];
let result = CilAssemblyView::from_mem(invalid_data);
assert!(result.is_err());
let empty_data = Vec::new();
let result = CilAssemblyView::from_mem(empty_data);
assert!(result.is_err());
}
}