#![allow(clippy::cast_possible_truncation)]
use log::warn;
pub const TAG_TERM_NAME: u8 = 1;
pub const TAG_TYPE_NAME: u8 = 2;
pub const TAG_NONE_SYM: u8 = 3;
pub const TAG_TYPE_SYM: u8 = 4;
pub const TAG_ALIAS_SYM: u8 = 5;
pub const TAG_CLASS_SYM: u8 = 6;
pub const TAG_MODULE_SYM: u8 = 7;
pub const TAG_VAL_SYM: u8 = 8;
pub const TAG_EXT_REF: u8 = 9;
pub const TAG_EXT_MOD_CLASS_REF: u8 = 10;
pub const FLAG_PRIVATE: u64 = 1 << 2;
pub const FLAG_PROTECTED: u64 = 1 << 3;
pub const FLAG_SEALED: u64 = 1 << 5;
pub const FLAG_CASE: u64 = 1 << 7;
pub const FLAG_ABSTRACT: u64 = 1 << 8;
pub const FLAG_MODULE: u64 = 1 << 11;
pub const FLAG_INTERFACE: u64 = 1 << 13;
pub const FLAG_TRAIT: u64 = 1 << 36;
#[derive(Debug, Clone)]
pub struct SignatureEntry {
pub tag: u8,
pub data: Vec<u8>,
}
#[derive(Debug, Clone)]
pub struct ScalaSymbolInfo {
pub name_index: usize,
pub owner_index: usize,
pub flags: u64,
pub info_index: usize,
}
#[derive(Debug)]
pub struct ScalaSignatureReader {
entries: Vec<SignatureEntry>,
}
impl ScalaSignatureReader {
#[must_use]
pub fn parse(bytes: &[u8]) -> Option<Self> {
if bytes.len() < 2 {
warn!("scala signature too short ({} bytes)", bytes.len());
return None;
}
let major = bytes[0];
let minor = bytes[1];
if major != 5 {
warn!("unsupported Scala signature version {major}.{minor} (expected 5.x)");
return None;
}
let mut pos = 2;
let entry_count = read_nat(bytes, &mut pos)? as usize;
let mut entries = Vec::with_capacity(entry_count);
for _ in 0..entry_count {
let entry = read_entry(bytes, &mut pos)?;
entries.push(entry);
}
Some(Self { entries })
}
#[must_use]
pub fn entry_count(&self) -> usize {
self.entries.len()
}
#[must_use]
pub fn entry(&self, index: usize) -> Option<&SignatureEntry> {
self.entries.get(index)
}
#[must_use]
pub fn read_name(&self, index: usize) -> Option<String> {
let entry = self.entry(index)?;
if entry.tag != TAG_TERM_NAME && entry.tag != TAG_TYPE_NAME {
return None;
}
String::from_utf8(entry.data.clone()).ok()
}
#[must_use]
pub fn read_symbol_info(&self, entry: &SignatureEntry) -> Option<ScalaSymbolInfo> {
if entry.tag != TAG_CLASS_SYM && entry.tag != TAG_MODULE_SYM {
return None;
}
parse_symbol_info(&entry.data)
}
#[must_use]
#[allow(clippy::items_after_statements)] #[allow(clippy::match_same_arms)] #[allow(clippy::manual_let_else)] pub fn resolve_qualified_name(&self, sym_index: usize) -> Option<String> {
let entry = self.entry(sym_index)?;
let info = self.read_symbol_info(entry)?;
let name = self.read_name(info.name_index)?;
let mut segments = vec![name];
let mut current_owner = info.owner_index;
const MAX_DEPTH: usize = 128;
for _ in 0..MAX_DEPTH {
let owner_entry = match self.entry(current_owner) {
Some(e) => e,
None => break,
};
match owner_entry.tag {
TAG_CLASS_SYM | TAG_MODULE_SYM => {
if let Some(owner_info) = self.read_symbol_info(owner_entry) {
if let Some(owner_name) = self.read_name(owner_info.name_index) {
segments.push(owner_name);
current_owner = owner_info.owner_index;
} else {
break;
}
} else {
break;
}
}
TAG_EXT_REF | TAG_EXT_MOD_CLASS_REF => {
if let Some(ext_name) = self.read_ext_ref_name(owner_entry) {
if ext_name != "<empty>" {
segments.push(ext_name);
}
}
break;
}
TAG_NONE_SYM => break,
_ => break,
}
}
segments.reverse();
Some(segments.join("."))
}
#[must_use]
fn read_ext_ref_name(&self, entry: &SignatureEntry) -> Option<String> {
if entry.tag != TAG_EXT_REF && entry.tag != TAG_EXT_MOD_CLASS_REF {
return None;
}
let mut pos = 0;
let name_index = read_nat(&entry.data, &mut pos)? as usize;
self.read_name(name_index)
}
#[must_use]
pub fn read_ext_ref_owner(&self, entry: &SignatureEntry) -> Option<usize> {
if entry.tag != TAG_EXT_REF && entry.tag != TAG_EXT_MOD_CLASS_REF {
return None;
}
let mut pos = 0;
let _name_index = read_nat(&entry.data, &mut pos)?;
if pos < entry.data.len() {
Some(read_nat(&entry.data, &mut pos)? as usize)
} else {
None
}
}
#[must_use]
pub fn class_and_module_symbols(&self) -> Vec<(usize, &SignatureEntry)> {
self.entries
.iter()
.enumerate()
.filter(|(_, e)| e.tag == TAG_CLASS_SYM || e.tag == TAG_MODULE_SYM)
.collect()
}
#[must_use]
pub fn ext_refs(&self) -> Vec<(usize, &SignatureEntry)> {
self.entries
.iter()
.enumerate()
.filter(|(_, e)| e.tag == TAG_EXT_REF || e.tag == TAG_EXT_MOD_CLASS_REF)
.collect()
}
}
pub fn read_nat(data: &[u8], pos: &mut usize) -> Option<u64> {
let mut result: u64 = 0;
let mut shift: u32 = 0;
loop {
if *pos >= data.len() {
return None;
}
let byte = data[*pos];
*pos += 1;
let value = u64::from(byte & 0x7F);
result = result.checked_add(value.checked_shl(shift)?)?;
shift += 7;
if byte & 0x80 == 0 {
return Some(result);
}
if shift > 63 {
return None;
}
}
}
pub fn read_long_nat(data: &[u8], pos: &mut usize) -> Option<u64> {
read_nat(data, pos)
}
fn read_entry(data: &[u8], pos: &mut usize) -> Option<SignatureEntry> {
if *pos >= data.len() {
return None;
}
let tag = data[*pos];
*pos += 1;
let length = read_nat(data, pos)? as usize;
if *pos + length > data.len() {
return None;
}
let entry_data = data[*pos..*pos + length].to_vec();
*pos += length;
Some(SignatureEntry {
tag,
data: entry_data,
})
}
fn parse_symbol_info(data: &[u8]) -> Option<ScalaSymbolInfo> {
let mut pos = 0;
let name_index = read_nat(data, &mut pos)? as usize;
let owner_index = read_nat(data, &mut pos)? as usize;
let flags = read_long_nat(data, &mut pos)?;
let mut remaining_nats = Vec::new();
while pos < data.len() {
match read_nat(data, &mut pos) {
Some(v) => remaining_nats.push(v as usize),
None => break,
}
}
let info_index = remaining_nats.pop().unwrap_or(0);
Some(ScalaSymbolInfo {
name_index,
owner_index,
flags,
info_index,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn encode_nat(mut value: u64) -> Vec<u8> {
let mut bytes = Vec::new();
loop {
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80;
}
bytes.push(byte);
if value == 0 {
break;
}
}
bytes
}
fn build_entry(tag: u8, data: &[u8]) -> Vec<u8> {
let mut entry = vec![tag];
entry.extend(encode_nat(data.len() as u64));
entry.extend_from_slice(data);
entry
}
fn build_signature(entries: Vec<Vec<u8>>) -> Vec<u8> {
let mut buf = vec![5, 0]; buf.extend(encode_nat(entries.len() as u64));
for entry in entries {
buf.extend(entry);
}
buf
}
#[test]
fn nat_single_byte() {
let data = [42];
let mut pos = 0;
assert_eq!(read_nat(&data, &mut pos), Some(42));
assert_eq!(pos, 1);
}
#[test]
fn nat_zero() {
let data = [0];
let mut pos = 0;
assert_eq!(read_nat(&data, &mut pos), Some(0));
assert_eq!(pos, 1);
}
#[test]
fn nat_max_single_byte() {
let data = [127];
let mut pos = 0;
assert_eq!(read_nat(&data, &mut pos), Some(127));
assert_eq!(pos, 1);
}
#[test]
fn nat_two_bytes() {
let data = [0x80, 0x01];
let mut pos = 0;
assert_eq!(read_nat(&data, &mut pos), Some(128));
assert_eq!(pos, 2);
}
#[test]
fn nat_multi_byte_300() {
let data = [0xAC, 0x02];
let mut pos = 0;
assert_eq!(read_nat(&data, &mut pos), Some(300));
assert_eq!(pos, 2);
}
#[test]
fn nat_round_trip() {
for value in [0, 1, 127, 128, 255, 300, 16383, 16384, 65535, 1_000_000] {
let encoded = encode_nat(value);
let mut pos = 0;
assert_eq!(
read_nat(&encoded, &mut pos),
Some(value),
"round-trip failed for {value}"
);
assert_eq!(pos, encoded.len());
}
}
#[test]
fn nat_truncated_returns_none() {
let data = [0x80];
let mut pos = 0;
assert_eq!(read_nat(&data, &mut pos), None);
}
#[test]
fn nat_empty_returns_none() {
let data: [u8; 0] = [];
let mut pos = 0;
assert_eq!(read_nat(&data, &mut pos), None);
}
#[test]
fn parse_empty_signature() {
let sig = build_signature(vec![]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
assert_eq!(reader.entry_count(), 0);
}
#[test]
fn parse_name_entries() {
let name_data = b"MyClass".to_vec();
let name_entry = build_entry(TAG_TYPE_NAME, &name_data);
let sig = build_signature(vec![name_entry]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
assert_eq!(reader.entry_count(), 1);
assert_eq!(reader.read_name(0), Some("MyClass".to_string()));
}
#[test]
fn parse_term_name() {
let name_data = b"myVal".to_vec();
let name_entry = build_entry(TAG_TERM_NAME, &name_data);
let sig = build_signature(vec![name_entry]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
assert_eq!(reader.read_name(0), Some("myVal".to_string()));
}
#[test]
fn read_name_wrong_tag_returns_none() {
let entry = build_entry(TAG_CLASS_SYM, b"data");
let sig = build_signature(vec![entry]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
assert_eq!(reader.read_name(0), None);
}
#[test]
fn read_name_out_of_bounds_returns_none() {
let sig = build_signature(vec![]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
assert_eq!(reader.read_name(0), None);
}
#[test]
fn parse_class_sym_entry() {
let name = build_entry(TAG_TYPE_NAME, b"Point");
let owner = build_entry(TAG_NONE_SYM, &[]);
let mut sym_data = Vec::new();
sym_data.extend(encode_nat(0)); sym_data.extend(encode_nat(1)); sym_data.extend(encode_nat(FLAG_CASE)); sym_data.extend(encode_nat(0)); let class_sym = build_entry(TAG_CLASS_SYM, &sym_data);
let sig = build_signature(vec![name, owner, class_sym]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
let entry = reader.entry(2).unwrap();
assert_eq!(entry.tag, TAG_CLASS_SYM);
let info = reader.read_symbol_info(entry).unwrap();
assert_eq!(info.name_index, 0);
assert_eq!(info.owner_index, 1);
assert_eq!(info.flags & FLAG_CASE, FLAG_CASE);
assert_eq!(reader.read_name(info.name_index), Some("Point".to_string()));
}
#[test]
fn parse_module_sym_entry() {
let name = build_entry(TAG_TERM_NAME, b"Config");
let owner = build_entry(TAG_NONE_SYM, &[]);
let mut sym_data = Vec::new();
sym_data.extend(encode_nat(0)); sym_data.extend(encode_nat(1)); sym_data.extend(encode_nat(FLAG_MODULE)); sym_data.extend(encode_nat(0)); let mod_sym = build_entry(TAG_MODULE_SYM, &sym_data);
let sig = build_signature(vec![name, owner, mod_sym]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
let entry = reader.entry(2).unwrap();
assert_eq!(entry.tag, TAG_MODULE_SYM);
let info = reader.read_symbol_info(entry).unwrap();
assert_eq!(info.flags & FLAG_MODULE, FLAG_MODULE);
}
#[test]
fn class_and_module_symbols_finds_all() {
let name1 = build_entry(TAG_TYPE_NAME, b"A");
let name2 = build_entry(TAG_TERM_NAME, b"B");
let owner = build_entry(TAG_NONE_SYM, &[]);
let mut cls_data = Vec::new();
cls_data.extend(encode_nat(0));
cls_data.extend(encode_nat(2));
cls_data.extend(encode_nat(0));
cls_data.extend(encode_nat(0));
let cls = build_entry(TAG_CLASS_SYM, &cls_data);
let mut mod_data = Vec::new();
mod_data.extend(encode_nat(1));
mod_data.extend(encode_nat(2));
mod_data.extend(encode_nat(0));
mod_data.extend(encode_nat(0));
let module = build_entry(TAG_MODULE_SYM, &mod_data);
let sig = build_signature(vec![name1, name2, owner, cls, module]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
let symbols = reader.class_and_module_symbols();
assert_eq!(symbols.len(), 2);
assert_eq!(symbols[0].0, 3); assert_eq!(symbols[1].0, 4); }
#[test]
fn ext_ref_name_resolution() {
let name = build_entry(TAG_TERM_NAME, b"scala");
let mut ext_data = Vec::new();
ext_data.extend(encode_nat(0)); let ext = build_entry(TAG_EXT_REF, &ext_data);
let sig = build_signature(vec![name, ext]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
let ext_entry = reader.entry(1).unwrap();
assert_eq!(
reader.read_ext_ref_name(ext_entry),
Some("scala".to_string())
);
}
#[test]
fn ext_ref_with_owner() {
let name = build_entry(TAG_TERM_NAME, b"Option");
let owner_name = build_entry(TAG_TERM_NAME, b"scala");
let mut owner_ext_data = Vec::new();
owner_ext_data.extend(encode_nat(1)); let owner_ext = build_entry(TAG_EXT_MOD_CLASS_REF, &owner_ext_data);
let mut ext_data = Vec::new();
ext_data.extend(encode_nat(0)); ext_data.extend(encode_nat(2)); let ext = build_entry(TAG_EXT_REF, &ext_data);
let sig = build_signature(vec![name, owner_name, owner_ext, ext]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
let entry = reader.entry(3).unwrap();
assert_eq!(reader.read_ext_ref_owner(entry), Some(2));
}
#[test]
fn unsupported_major_version_returns_none() {
let mut sig = build_signature(vec![]);
sig[0] = 4; assert!(ScalaSignatureReader::parse(&sig).is_none());
}
#[test]
fn too_short_returns_none() {
assert!(ScalaSignatureReader::parse(&[5]).is_none());
assert!(ScalaSignatureReader::parse(&[]).is_none());
}
#[test]
fn truncated_entry_returns_none() {
let mut data = vec![5, 0]; data.extend(encode_nat(1)); data.push(TAG_TYPE_NAME); data.extend(encode_nat(100)); assert!(ScalaSignatureReader::parse(&data).is_none());
}
#[test]
fn symbol_info_from_non_symbol_returns_none() {
let name = build_entry(TAG_TYPE_NAME, b"Foo");
let sig = build_signature(vec![name]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
let entry = reader.entry(0).unwrap();
assert!(reader.read_symbol_info(entry).is_none());
}
#[test]
fn trait_flag_detection() {
let name = build_entry(TAG_TYPE_NAME, b"Functor");
let owner = build_entry(TAG_NONE_SYM, &[]);
let flags = FLAG_TRAIT | FLAG_INTERFACE | FLAG_ABSTRACT;
let mut sym_data = Vec::new();
sym_data.extend(encode_nat(0)); sym_data.extend(encode_nat(1)); sym_data.extend(encode_nat(flags)); sym_data.extend(encode_nat(0)); let cls = build_entry(TAG_CLASS_SYM, &sym_data);
let sig = build_signature(vec![name, owner, cls]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
let entry = reader.entry(2).unwrap();
let info = reader.read_symbol_info(entry).unwrap();
assert_ne!(info.flags & FLAG_TRAIT, 0);
assert_ne!(info.flags & FLAG_INTERFACE, 0);
assert_ne!(info.flags & FLAG_ABSTRACT, 0);
}
#[test]
fn sealed_flag_detection() {
let name = build_entry(TAG_TYPE_NAME, b"Expr");
let owner = build_entry(TAG_NONE_SYM, &[]);
let flags = FLAG_SEALED | FLAG_ABSTRACT | FLAG_TRAIT | FLAG_INTERFACE;
let mut sym_data = Vec::new();
sym_data.extend(encode_nat(0));
sym_data.extend(encode_nat(1));
sym_data.extend(encode_nat(flags));
sym_data.extend(encode_nat(0));
let cls = build_entry(TAG_CLASS_SYM, &sym_data);
let sig = build_signature(vec![name, owner, cls]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
let entry = reader.entry(2).unwrap();
let info = reader.read_symbol_info(entry).unwrap();
assert_ne!(info.flags & FLAG_SEALED, 0);
}
#[test]
fn private_and_protected_flags() {
let name = build_entry(TAG_TYPE_NAME, b"Inner");
let owner = build_entry(TAG_NONE_SYM, &[]);
let mut sym_data = Vec::new();
sym_data.extend(encode_nat(0));
sym_data.extend(encode_nat(1));
sym_data.extend(encode_nat(FLAG_PRIVATE));
sym_data.extend(encode_nat(0));
let cls = build_entry(TAG_CLASS_SYM, &sym_data);
let sig = build_signature(vec![name, owner, cls]);
let reader = ScalaSignatureReader::parse(&sig).unwrap();
let entry = reader.entry(2).unwrap();
let info = reader.read_symbol_info(entry).unwrap();
assert_ne!(info.flags & FLAG_PRIVATE, 0);
assert_eq!(info.flags & FLAG_PROTECTED, 0);
}
#[test]
fn large_nat_flag_value() {
let encoded = encode_nat(FLAG_TRAIT);
let mut pos = 0;
let decoded = read_nat(&encoded, &mut pos).unwrap();
assert_eq!(decoded, FLAG_TRAIT);
assert_eq!(decoded, 1 << 36);
}
}