use anyhow::Result;
use std::collections::HashMap;
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct SymbolMetadataRec {
pub symbol_id: u64,
pub name_offset: u32,
pub fqn_offset: u32,
pub file_path_offset: u32,
pub kind: u8,
pub language: u8,
pub _padding1: u16,
pub _padding2: u32,
pub byte_start: u64,
pub byte_end: u64,
pub start_line: u64,
pub start_col: u64,
pub end_line: u64,
pub end_col: u64,
}
impl SymbolMetadataRec {
pub const SIZE: usize = 80; }
unsafe impl bytemuck::Pod for SymbolMetadataRec {}
unsafe impl bytemuck::Zeroable for SymbolMetadataRec {}
#[derive(Debug, Clone, PartialEq)]
pub struct SymbolMetadata {
pub symbol_id: u64,
pub name: String,
pub fqn: String,
pub file_path: String,
pub kind: u8,
pub language: u8,
pub byte_start: u64,
pub byte_end: u64,
pub start_line: u64,
pub start_col: u64,
pub end_line: u64,
pub end_col: u64,
}
#[derive(Debug, Clone, Default)]
pub struct StringTable {
data: Vec<u8>,
offset_map: HashMap<String, u32>,
}
impl StringTable {
pub fn new() -> Self {
Self {
data: Vec::new(),
offset_map: HashMap::new(),
}
}
pub fn add(&mut self, s: &str) -> u32 {
if let Some(&offset) = self.offset_map.get(s) {
return offset;
}
let offset = self.data.len() as u32;
self.data.extend_from_slice(s.as_bytes());
self.data.push(0);
self.offset_map.insert(s.to_string(), offset);
offset
}
pub fn get(&self, offset: u32) -> Option<String> {
if offset as usize >= self.data.len() {
return None;
}
let start = offset as usize;
let end = self.data[start..].iter().position(|&b| b == 0)?;
String::from_utf8(self.data[start..start + end].to_vec()).ok()
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::with_capacity(8 + self.data.len());
bytes.extend_from_slice(&(self.data.len() as u64).to_le_bytes());
bytes.extend_from_slice(&self.data);
bytes
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
if bytes.len() < 8 {
anyhow::bail!("String table too short");
}
let data_len = u64::from_le_bytes(bytes[0..8].try_into()?) as usize;
if bytes.len() < 8 + data_len {
anyhow::bail!("String table data truncated");
}
let data = bytes[8..8 + data_len].to_vec();
let mut offset_map = HashMap::new();
let mut offset = 0;
while offset < data.len() {
let end = data[offset..]
.iter()
.position(|&b| b == 0)
.map(|p| offset + p)
.unwrap_or(data.len());
if let Ok(s) = String::from_utf8(data[offset..end].to_vec()) {
offset_map.insert(s, offset as u32);
}
offset = end + 1; }
Ok(Self { data, offset_map })
}
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
pub fn len(&self) -> usize {
self.offset_map.len()
}
}
#[derive(Debug, Clone, Default)]
pub struct FileInfo {
pub path: String,
pub hash: Option<String>, pub last_indexed_at: i64, }
#[derive(Debug, Clone, Default)]
pub struct FileTable {
path_to_id: HashMap<String, u32>,
id_to_info: HashMap<u32, FileInfo>,
next_id: u32,
}
impl FileTable {
pub fn new() -> Self {
Self {
path_to_id: HashMap::new(),
id_to_info: HashMap::new(),
next_id: 1, }
}
pub fn get_or_assign_id(&mut self, path: &str) -> u32 {
if let Some(&id) = self.path_to_id.get(path) {
return id;
}
let id = self.next_id;
self.next_id += 1;
self.path_to_id.insert(path.to_string(), id);
self.id_to_info.insert(
id,
FileInfo {
path: path.to_string(),
hash: None,
last_indexed_at: 0,
},
);
id
}
pub fn get_path(&self, id: u32) -> Option<&str> {
self.id_to_info.get(&id).map(|info| info.path.as_str())
}
pub fn get_info(&self, id: u32) -> Option<&FileInfo> {
self.id_to_info.get(&id)
}
pub fn get_info_by_path(&self, path: &str) -> Option<&FileInfo> {
self.path_to_id
.get(path)
.and_then(|&id| self.id_to_info.get(&id))
}
pub fn set_file_hash(&mut self, path: &str, hash: &str) {
let id = self.get_or_assign_id(path);
if let Some(info) = self.id_to_info.get_mut(&id) {
info.hash = Some(hash.to_string());
info.last_indexed_at = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64;
}
}
pub fn get_file_hash(&self, path: &str) -> Option<&str> {
self.get_info_by_path(path)
.and_then(|info| info.hash.as_deref())
}
pub fn get_id(&self, path: &str) -> Option<u32> {
self.path_to_id.get(path).copied()
}
pub fn file_count(&self) -> usize {
self.path_to_id.len()
}
pub fn all_paths(&self) -> Vec<&str> {
self.id_to_info
.values()
.map(|info| info.path.as_str())
.collect()
}
pub fn all_files(&self) -> Vec<&FileInfo> {
self.id_to_info.values().collect()
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::new();
bytes.extend_from_slice(&(self.path_to_id.len() as u64).to_le_bytes());
for (id, info) in &self.id_to_info {
bytes.extend_from_slice(&id.to_le_bytes());
bytes.extend_from_slice(&(info.path.len() as u32).to_le_bytes());
let hash_len = info.hash.as_ref().map(|h| h.len()).unwrap_or(0) as u32;
bytes.extend_from_slice(&hash_len.to_le_bytes());
bytes.extend_from_slice(&info.last_indexed_at.to_le_bytes());
bytes.extend_from_slice(info.path.as_bytes());
if let Some(hash) = &info.hash {
bytes.extend_from_slice(hash.as_bytes());
}
}
bytes
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
if bytes.len() < 8 {
anyhow::bail!("File table too short");
}
let count = u64::from_le_bytes(bytes[0..8].try_into()?) as usize;
let mut offset = 8;
let mut path_to_id = HashMap::new();
let mut id_to_info = HashMap::new();
let mut max_id = 0;
for _ in 0..count {
if offset + 20 > bytes.len() {
anyhow::bail!("File table entry truncated");
}
let id = u32::from_le_bytes(bytes[offset..offset + 4].try_into()?);
let path_len = u32::from_le_bytes(bytes[offset + 4..offset + 8].try_into()?) as usize;
let hash_len = u32::from_le_bytes(bytes[offset + 8..offset + 12].try_into()?) as usize;
let last_indexed_at = i64::from_le_bytes(bytes[offset + 12..offset + 20].try_into()?);
offset += 20;
if offset + path_len + hash_len > bytes.len() {
anyhow::bail!("File data truncated");
}
let path = String::from_utf8(bytes[offset..offset + path_len].to_vec())?;
offset += path_len;
let hash = if hash_len > 0 {
Some(String::from_utf8(
bytes[offset..offset + hash_len].to_vec(),
)?)
} else {
None
};
offset += hash_len;
path_to_id.insert(path.clone(), id);
id_to_info.insert(
id,
FileInfo {
path,
hash,
last_indexed_at,
},
);
max_id = max_id.max(id);
}
Ok(Self {
path_to_id,
id_to_info,
next_id: max_id + 1,
})
}
}
#[derive(Debug, Clone, Default)]
pub struct SymbolMetadataStore {
pub metadata: HashMap<u64, SymbolMetadataRec>,
pub strings: StringTable,
pub files: FileTable,
}
impl SymbolMetadataStore {
pub fn new() -> Self {
Self {
metadata: HashMap::new(),
strings: StringTable::new(),
files: FileTable::new(),
}
}
pub fn add(&mut self, meta: SymbolMetadata) {
let name_offset = self.strings.add(&meta.name);
let fqn_offset = self.strings.add(&meta.fqn);
let file_path_offset = self.strings.add(&meta.file_path);
self.files.get_or_assign_id(&meta.file_path);
let rec = SymbolMetadataRec {
symbol_id: meta.symbol_id,
name_offset,
fqn_offset,
file_path_offset,
kind: meta.kind,
language: meta.language,
_padding1: 0,
_padding2: 0,
byte_start: meta.byte_start,
byte_end: meta.byte_end,
start_line: meta.start_line,
start_col: meta.start_col,
end_line: meta.end_line,
end_col: meta.end_col,
};
self.metadata.insert(meta.symbol_id, rec);
}
pub fn get(&self, symbol_id: u64) -> Option<SymbolMetadata> {
let rec = self.metadata.get(&symbol_id)?;
Some(SymbolMetadata {
symbol_id: rec.symbol_id,
name: self.strings.get(rec.name_offset)?,
fqn: self.strings.get(rec.fqn_offset)?,
file_path: self.strings.get(rec.file_path_offset)?,
kind: rec.kind,
language: rec.language,
byte_start: rec.byte_start,
byte_end: rec.byte_end,
start_line: rec.start_line,
start_col: rec.start_col,
end_line: rec.end_line,
end_col: rec.end_col,
})
}
pub fn find_by_fqn(&self, fqn: &str) -> Option<u64> {
let target_offset = self.strings.offset_map.get(fqn)?;
self.metadata
.values()
.find(|rec| rec.fqn_offset == *target_offset)
.map(|rec| rec.symbol_id)
}
pub fn find_by_name(&self, name: &str) -> Vec<u64> {
let Some(&target_offset) = self.strings.offset_map.get(name) else {
return Vec::new();
};
self.metadata
.values()
.filter(|rec| rec.name_offset == target_offset)
.map(|rec| rec.symbol_id)
.collect()
}
pub fn symbols_in_file(&self, file_path: &str) -> Vec<u64> {
let Some(&target_offset) = self.strings.offset_map.get(file_path) else {
return Vec::new();
};
self.metadata
.values()
.filter(|rec| rec.file_path_offset == target_offset)
.map(|rec| rec.symbol_id)
.collect()
}
pub fn symbol_count(&self) -> usize {
self.metadata.len()
}
pub fn file_count(&self) -> usize {
self.files.file_count()
}
pub fn all_file_paths(&self) -> Vec<String> {
self.files
.all_paths()
.into_iter()
.map(|s| s.to_string())
.collect()
}
pub fn all_symbol_ids(&self) -> Vec<u64> {
self.metadata.keys().copied().collect()
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::new();
bytes.extend_from_slice(&(self.metadata.len() as u64).to_le_bytes());
for rec in self.metadata.values() {
bytes.extend_from_slice(bytemuck::bytes_of(rec));
}
let string_bytes = self.strings.to_bytes();
bytes.extend_from_slice(&(string_bytes.len() as u64).to_le_bytes());
bytes.extend_from_slice(&string_bytes);
let file_bytes = self.files.to_bytes();
bytes.extend_from_slice(&(file_bytes.len() as u64).to_le_bytes());
bytes.extend_from_slice(&file_bytes);
bytes
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
let mut offset = 0;
if bytes.len() < 8 {
anyhow::bail!("Symbol metadata too short for count");
}
let metadata_count = u64::from_le_bytes(bytes[offset..offset + 8].try_into()?) as usize;
offset += 8;
let mut metadata = HashMap::with_capacity(metadata_count);
let rec_size = std::mem::size_of::<SymbolMetadataRec>();
for _ in 0..metadata_count {
if offset + rec_size > bytes.len() {
anyhow::bail!("Metadata record truncated");
}
let rec_bytes = &bytes[offset..offset + rec_size];
let rec: SymbolMetadataRec = match bytemuck::try_from_bytes(rec_bytes) {
Ok(r) => *r,
Err(e) => anyhow::bail!("Failed to parse metadata record: {:?}", e),
};
offset += rec_size;
metadata.insert(rec.symbol_id, rec);
}
if offset + 8 > bytes.len() {
anyhow::bail!("Missing string table length");
}
let string_table_len = u64::from_le_bytes(bytes[offset..offset + 8].try_into()?) as usize;
offset += 8;
if offset + string_table_len > bytes.len() {
anyhow::bail!("String table truncated");
}
let strings = StringTable::from_bytes(&bytes[offset..offset + string_table_len])?;
offset += string_table_len;
if offset + 8 > bytes.len() {
anyhow::bail!("Missing file table length");
}
let file_table_len = u64::from_le_bytes(bytes[offset..offset + 8].try_into()?) as usize;
offset += 8;
if offset + file_table_len > bytes.len() {
anyhow::bail!("File table truncated");
}
let files = FileTable::from_bytes(&bytes[offset..offset + file_table_len])?;
Ok(Self {
metadata,
strings,
files,
})
}
pub fn set_file_hash(&mut self, path: &str, hash: &str) {
self.files.set_file_hash(path, hash);
}
pub fn get_file_hash(&self, path: &str) -> Option<&str> {
self.files.get_file_hash(path)
}
pub fn all_files(&self) -> Vec<&FileInfo> {
self.files.all_files()
}
pub fn get_file_info(&self, path: &str) -> Option<&FileInfo> {
self.files.get_info_by_path(path)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_table_basic() {
let mut table = StringTable::new();
let offset1 = table.add("hello");
let offset2 = table.add("world");
let offset3 = table.add("hello");
assert_eq!(offset1, offset3); assert_ne!(offset1, offset2);
assert_eq!(table.get(offset1), Some("hello".to_string()));
assert_eq!(table.get(offset2), Some("world".to_string()));
}
#[test]
fn test_string_table_serialization() {
let mut table = StringTable::new();
table.add("foo");
table.add("bar");
let bytes = table.to_bytes();
let restored = StringTable::from_bytes(&bytes).unwrap();
assert_eq!(restored.len(), 2);
assert!(restored.get(0).is_some());
}
#[test]
fn test_file_table_basic() {
let mut table = FileTable::new();
let id1 = table.get_or_assign_id("/src/main.rs");
let id2 = table.get_or_assign_id("/src/lib.rs");
let id3 = table.get_or_assign_id("/src/main.rs");
assert_eq!(id1, id3);
assert_ne!(id1, id2);
assert_eq!(table.file_count(), 2);
assert_eq!(table.get_path(id1), Some("/src/main.rs"));
assert_eq!(table.get_id("/src/lib.rs"), Some(id2));
}
#[test]
fn test_file_table_serialization() {
let mut table = FileTable::new();
table.get_or_assign_id("/a.rs");
table.get_or_assign_id("/b.rs");
let bytes = table.to_bytes();
let restored = FileTable::from_bytes(&bytes).unwrap();
assert_eq!(restored.file_count(), 2);
assert!(restored.get_id("/a.rs").is_some());
assert!(restored.get_id("/b.rs").is_some());
}
#[test]
fn test_symbol_metadata_store_basic() {
let mut store = SymbolMetadataStore::new();
let meta = SymbolMetadata {
symbol_id: 1,
name: "my_func".to_string(),
fqn: "crate::my_func".to_string(),
file_path: "/src/lib.rs".to_string(),
kind: 1,
language: 1,
byte_start: 100,
byte_end: 200,
start_line: 10,
start_col: 0,
end_line: 20,
end_col: 1,
};
store.add(meta.clone());
assert_eq!(store.symbol_count(), 1);
assert_eq!(store.file_count(), 1);
let retrieved = store.get(1).unwrap();
assert_eq!(retrieved.name, "my_func");
assert_eq!(retrieved.fqn, "crate::my_func");
assert_eq!(retrieved.file_path, "/src/lib.rs");
}
#[test]
fn test_symbol_metadata_find_by_fqn() {
let mut store = SymbolMetadataStore::new();
store.add(SymbolMetadata {
symbol_id: 1,
name: "func1".to_string(),
fqn: "crate::module::func1".to_string(),
file_path: "/src/lib.rs".to_string(),
kind: 1,
language: 1,
byte_start: 0,
byte_end: 10,
start_line: 0,
start_col: 0,
end_line: 0,
end_col: 0,
});
store.add(SymbolMetadata {
symbol_id: 2,
name: "func2".to_string(),
fqn: "crate::module::func2".to_string(),
file_path: "/src/lib.rs".to_string(),
kind: 1,
language: 1,
byte_start: 20,
byte_end: 30,
start_line: 0,
start_col: 0,
end_line: 0,
end_col: 0,
});
assert_eq!(store.find_by_fqn("crate::module::func1"), Some(1));
assert_eq!(store.find_by_fqn("crate::module::func2"), Some(2));
assert_eq!(store.find_by_fqn("nonexistent"), None);
}
#[test]
fn test_symbol_metadata_find_by_name() {
let mut store = SymbolMetadataStore::new();
store.add(SymbolMetadata {
symbol_id: 1,
name: "foo".to_string(),
fqn: "crate::A::foo".to_string(),
file_path: "/src/a.rs".to_string(),
kind: 1,
language: 1,
byte_start: 0,
byte_end: 10,
start_line: 0,
start_col: 0,
end_line: 0,
end_col: 0,
});
store.add(SymbolMetadata {
symbol_id: 2,
name: "foo".to_string(), fqn: "crate::B::foo".to_string(),
file_path: "/src/b.rs".to_string(),
kind: 1,
language: 1,
byte_start: 0,
byte_end: 10,
start_line: 0,
start_col: 0,
end_line: 0,
end_col: 0,
});
let results = store.find_by_name("foo");
assert_eq!(results.len(), 2);
assert!(results.contains(&1));
assert!(results.contains(&2));
}
#[test]
fn test_symbol_metadata_symbols_in_file() {
let mut store = SymbolMetadataStore::new();
store.add(SymbolMetadata {
symbol_id: 1,
name: "func1".to_string(),
fqn: "crate::func1".to_string(),
file_path: "/src/main.rs".to_string(),
kind: 1,
language: 1,
byte_start: 0,
byte_end: 10,
start_line: 0,
start_col: 0,
end_line: 0,
end_col: 0,
});
store.add(SymbolMetadata {
symbol_id: 2,
name: "func2".to_string(),
fqn: "crate::func2".to_string(),
file_path: "/src/lib.rs".to_string(),
kind: 1,
language: 1,
byte_start: 0,
byte_end: 10,
start_line: 0,
start_col: 0,
end_line: 0,
end_col: 0,
});
let main_symbols = store.symbols_in_file("/src/main.rs");
assert_eq!(main_symbols.len(), 1);
assert_eq!(main_symbols[0], 1);
assert_eq!(store.file_count(), 2);
}
#[test]
fn test_symbol_metadata_store_serialization() {
let mut store = SymbolMetadataStore::new();
store.add(SymbolMetadata {
symbol_id: 42,
name: "test_function".to_string(),
fqn: "my_crate::test_function".to_string(),
file_path: "/home/user/project/src/lib.rs".to_string(),
kind: 2,
language: 1,
byte_start: 150,
byte_end: 300,
start_line: 15,
start_col: 4,
end_line: 25,
end_col: 5,
});
let bytes = store.to_bytes();
let restored = SymbolMetadataStore::from_bytes(&bytes).unwrap();
assert_eq!(restored.symbol_count(), 1);
assert_eq!(restored.file_count(), 1);
let meta = restored.get(42).unwrap();
assert_eq!(meta.name, "test_function");
assert_eq!(meta.fqn, "my_crate::test_function");
assert_eq!(meta.file_path, "/home/user/project/src/lib.rs");
assert_eq!(meta.byte_start, 150);
assert_eq!(meta.byte_end, 300);
assert_eq!(meta.start_line, 15);
assert_eq!(meta.start_col, 4);
assert_eq!(meta.end_line, 25);
assert_eq!(meta.end_col, 5);
}
#[test]
fn test_symbol_metadata_store_reopen_preserves_all() {
let mut store = SymbolMetadataStore::new();
for i in 0..10 {
store.add(SymbolMetadata {
symbol_id: i as u64,
name: format!("func{}", i),
fqn: format!("crate::module::func{}", i),
file_path: format!("/src/file{}.rs", i % 3), kind: (i % 5) as u8,
language: 1,
byte_start: i as u64 * 100,
byte_end: i as u64 * 100 + 50,
start_line: i as u64,
start_col: 0,
end_line: i as u64 + 5,
end_col: 1,
});
}
let bytes = store.to_bytes();
let restored = SymbolMetadataStore::from_bytes(&bytes).unwrap();
assert_eq!(restored.symbol_count(), 10);
assert_eq!(restored.file_count(), 3);
for i in 0..10 {
let meta = restored.get(i as u64).unwrap();
assert_eq!(meta.name, format!("func{}", i));
assert_eq!(meta.fqn, format!("crate::module::func{}", i));
assert_eq!(restored.find_by_fqn(&meta.fqn), Some(i as u64));
}
let file0_symbols = restored.symbols_in_file("/src/file0.rs");
assert_eq!(file0_symbols.len(), 4); }
}