#![deny(missing_docs)]
use std::{collections::HashMap, fmt::Debug, io::Write};
use ksym::TOKEN_MARKER;
const PREFIX_CANDIDATE_LENS: &[usize] = &[
10, 24, 31, 40, 56, 60, 70, 80, 90, 100, 150, 200, 250, 300, 400, 500, 600, 700, 800, 900,
1000, 1200, 1400, 1600, 1800, 2000,
];
const MAX_TOKEN: usize = 512;
pub struct KallsymsBlob {
pub token_table: Vec<u8>,
pub token_index: Vec<u32>,
pub token_map: HashMap<String, u16>,
pub kallsyms_names: Vec<u8>,
pub kallsyms_offsets: Vec<u32>,
pub kallsyms_seqs_of_names: Vec<u32>,
pub kallsyms_addresses: Vec<u64>,
pub kallsyms_num_syms: usize,
}
impl Debug for KallsymsBlob {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("KallsymsBlob")
.field("token_table size", &(self.token_table.len()))
.field("token_index size", &(self.token_index.len() * 4))
.field("kallsyms_names size", &(self.kallsyms_names.len()))
.field("kallsyms_offsets size", &(self.kallsyms_offsets.len() * 4))
.field(
"kallsyms_seqs_of_names size",
&(self.kallsyms_seqs_of_names.len() * 4),
)
.field(
"kallsyms_addresses size",
&(self.kallsyms_addresses.len() * 8),
)
.field("kallsyms_num_syms", &self.kallsyms_num_syms)
.finish()
}
}
impl Default for KallsymsBlob {
fn default() -> Self {
Self::new()
}
}
impl KallsymsBlob {
pub fn new() -> Self {
Self {
token_table: Vec::new(),
token_index: Vec::new(),
token_map: HashMap::new(),
kallsyms_names: Vec::new(),
kallsyms_offsets: Vec::new(),
kallsyms_seqs_of_names: Vec::new(),
kallsyms_addresses: Vec::new(),
kallsyms_num_syms: 0,
}
}
fn add_token(&mut self, token: String) -> Option<u16> {
if let Some(&id) = self.token_map.get(&token) {
return Some(id);
}
let id = self.token_map.len() as u16;
self.token_index.push(self.token_table.len() as u32);
self.token_table.extend_from_slice(token.as_bytes());
self.token_map.insert(token, id);
Some(id)
}
pub fn compress_symbols(&mut self, symbols: &[(String, u64, char)]) {
let n = symbols.len();
if n == 0 {
return;
}
let mut idx_by_addr: Vec<usize> = (0..n).collect();
idx_by_addr.sort_by_key(|&i| symbols[i].1);
let mut idx_by_name: Vec<usize> = (0..n).collect();
idx_by_name.sort_by(|&i, &j| symbols[i].0.cmp(&symbols[j].0));
let mut orig_to_addr_idx = vec![0usize; n];
for (addr_pos, &orig_idx) in idx_by_addr.iter().enumerate() {
orig_to_addr_idx[orig_idx] = addr_pos;
}
let mut token_count: HashMap<String, usize> = HashMap::new();
for (name, _, _) in symbols.iter() {
let bytes = name.as_bytes();
for &len in PREFIX_CANDIDATE_LENS {
if bytes.len() >= len {
let token = std::str::from_utf8(&bytes[..len]).unwrap();
*token_count.entry(token.to_string()).or_insert(0) += 1;
} else if !bytes.is_empty() && len == PREFIX_CANDIDATE_LENS[0] {
let token = name;
*token_count.entry(token.to_string()).or_insert(0) += 1;
}
}
}
let mut tokens: Vec<(String, usize)> = token_count.into_iter().collect();
tokens.sort_by(|a, b| {
(b.1 * b.0.len()).cmp(&(a.1 * a.0.len())) });
let mut final_token_list: Vec<String> = Vec::new();
for (tok, _) in tokens.into_iter() {
if final_token_list.len() >= MAX_TOKEN {
break;
}
let mut is_prefix = false;
for existing in &final_token_list {
if existing.starts_with(&tok) {
is_prefix = true;
break;
}
}
if !is_prefix {
final_token_list.push(tok);
}
}
for tok in final_token_list.into_iter() {
self.add_token(tok);
}
for &orig_idx in &idx_by_addr {
let (ref sym, addr, ty) = symbols[orig_idx];
self.kallsyms_offsets.push(self.kallsyms_names.len() as u32);
self.kallsyms_addresses.push(addr);
let sym_bytes = sym.as_bytes();
let rem = sym_bytes.len();
let mut consumed = 0usize;
for &l in PREFIX_CANDIDATE_LENS.iter().rev() {
if l > rem {
continue;
}
let candidate = &sym[..l];
if let Some(&id) = self.token_map.get(candidate) {
let mut length: u16 = if id < 256 { 3 } else { 4 };
length += (rem - l) as u16;
self.kallsyms_names.push(ty as u8);
self.kallsyms_names.push((length & 0xFF) as u8);
self.kallsyms_names.push((length >> 8) as u8);
self.kallsyms_names.push(TOKEN_MARKER);
if id < 256 {
self.kallsyms_names.push(id as u8);
} else {
self.kallsyms_names.push((id >> 8) as u8);
self.kallsyms_names.push((id & 0xFF) as u8);
}
self.kallsyms_names.push(TOKEN_MARKER);
consumed = l;
break;
}
}
if consumed == 0 {
self.kallsyms_names.push(ty as u8);
let length = rem as u16;
self.kallsyms_names.push((length & 0xFF) as u8);
self.kallsyms_names.push((length >> 8) as u8);
}
self.kallsyms_names
.extend_from_slice(&sym_bytes[consumed..]);
}
self.kallsyms_seqs_of_names.reserve(n);
for &orig_idx in &idx_by_name {
let addr_idx = orig_to_addr_idx[orig_idx] as u32;
self.kallsyms_seqs_of_names.push(addr_idx);
}
self.kallsyms_num_syms = n;
}
pub fn to_blob(&self) -> Vec<u8> {
let mut blob = Vec::new();
#[inline]
fn pad(vec: &mut Vec<u8>, align: usize) {
let rem = vec.len() % align;
if rem != 0 {
vec.resize(vec.len() + (align - rem), 0);
}
}
blob.extend_from_slice(&(self.kallsyms_num_syms as u64).to_le_bytes());
pad(&mut blob, 8);
for &addr in &self.kallsyms_addresses {
blob.extend_from_slice(&addr.to_le_bytes());
}
pad(&mut blob, 4);
for &off in &self.kallsyms_offsets {
blob.extend_from_slice(&off.to_le_bytes());
}
pad(&mut blob, 4);
for &seq in &self.kallsyms_seqs_of_names {
blob.extend_from_slice(&seq.to_le_bytes());
}
pad(&mut blob, 8);
blob.extend_from_slice(&(self.kallsyms_names.len() as u64).to_le_bytes());
blob.extend_from_slice(&self.kallsyms_names);
pad(&mut blob, 8);
blob.extend_from_slice(&(self.token_table.len() as u64).to_le_bytes());
blob.extend_from_slice(&self.token_table);
pad(&mut blob, 8);
blob.extend_from_slice(&(self.token_index.len() as u64).to_le_bytes());
pad(&mut blob, 4);
for &idx in &self.token_index {
blob.extend_from_slice(&idx.to_le_bytes());
}
blob
}
}
fn read_symbol(line: &str) -> Option<(String, u64, char)> {
if line.len() > 4096 {
panic!("The kernel symbol is too long: {}", line);
}
let mut parts = line.split_whitespace();
let vaddr = u64::from_str_radix(parts.next()?, 16).ok()?;
let symbol_type = parts.next()?.chars().next()?;
let symbol = parts.collect::<Vec<_>>().join(" ");
Some((symbol, vaddr, symbol_type))
}
fn read_map() -> Vec<(String, u64, char)> {
let mut symbol_table = Vec::new();
let mut line = String::new();
loop {
let size = std::io::stdin().read_line(&mut line).unwrap();
if size == 0 {
break;
}
line = line.trim().to_string();
if let Some(entry) = read_symbol(&line) {
symbol_table.push(entry);
}
line.clear();
}
symbol_table
}
fn main() {
let symbol_table = read_map();
let mut blob = KallsymsBlob::new();
blob.compress_symbols(&symbol_table);
let binary_blob = blob.to_blob();
std::io::stdout()
.write_all(&binary_blob)
.expect("Failed to write blob");
}
#[allow(unused)]
fn simple_test() {
use ksym::KSYM_NAME_LEN;
use crate::KallsymsBlob;
let symbols = r#"
0000000000001000 T do_mkdir
0000000000001300 T alias_do_fork
0000000000001300 T do_fork
0000000000001300 T do_fork_2
0000000000001200 T cpu_startup_entry
0000000000001400 T cpu_startup_entry
0000000000001500 T _ZN6axtask4task9TaskInner4name17h96bec46d2d8f1ab4E
0000000000001510 T _RNvXs6_NtNtCskdqSPfBY5oR_10kbpf_basic3map5arrayNtB5_17PerfEventArrayMapNtB7_15BpfMapCommonOps13for_each_elem
"#;
let symbols: Vec<(String, u64, char)> = symbols
.lines()
.filter_map(|line| read_symbol(line.trim()))
.collect();
println!("Original symbols:");
for (sym, addr, ty) in &symbols {
println!("{:016x} {} {}", addr, ty, sym);
}
let mut blob = KallsymsBlob::new();
blob.compress_symbols(&symbols);
let binary_blob = blob.to_blob();
println!("Binary blob size: {} bytes", binary_blob.len());
let mapped = ksym::KallsymsMapped::from_blob(&binary_blob, 0x1000, 0x1600).expect("parse blob");
assert_eq!(mapped.lookup_address(100, &mut [0; KSYM_NAME_LEN]), None);
assert_eq!(
mapped.lookup_address(0x1200, &mut [0; KSYM_NAME_LEN]),
Some(("cpu_startup_entry", 0x100, 0, 'T'))
);
assert_eq!(
mapped.lookup_address(0x1300, &mut [0; KSYM_NAME_LEN]),
Some(("alias_do_fork", 0x100, 0, 'T'))
);
assert_eq!(
mapped.lookup_address(0x1250, &mut [0; KSYM_NAME_LEN]),
Some(("cpu_startup_entry", 0x100, 0x50, 'T'))
);
assert_eq!(
mapped.lookup_address(0x1450, &mut [0; KSYM_NAME_LEN]),
Some(("cpu_startup_entry", 0x100, 0x50, 'T'))
);
assert_eq!(
mapped.lookup_name("cpu_startup_entry"),
Some(0x1200),
"The address of cpu_startup_entry should be 0x1200 instead of 0x1400"
);
assert_eq!(mapped.lookup_name("do_fork_2"), Some(0x1300));
assert_eq!(
mapped.lookup_name("_ZN6axtask4task9TaskInner4name17h96bec46d2d8f1ab4E"),
Some(0x1500)
);
let addr = mapped.lookup_name("_RNvXs6_NtNtCsk9UQNouxT1Z_10kbpf_basic3map5arrayNtB5_17PerfEventArrayMapNtB7_15BpfMapCommonOps13for_each_elem");
assert_eq!(addr, None);
println!("All tests passed.");
let dumped = mapped.dump_all_symbols();
println!("Dumped all symbols:\n{}", dumped);
}
#[cfg(test)]
mod tests {
#[test]
fn test() {
super::simple_test();
}
}