harn_hostlib/code_index/file_table.rs
1//! Per-file metadata.
2//!
3//! `IndexedFile` holds the structural data the index needs at query time —
4//! language, size, content hash, raw import strings, and the list of
5//! outline symbols. `FileId` is a monotonically-assigned `u32` that all
6//! sub-indexes (`TrigramIndex`, `WordIndex`, `DepGraph`, `VersionLog`) key
7//! on so re-indexing a path doesn't have to invalidate string keys.
8
9/// Monotonically-assigned identifier for a file in the index. Stable
10/// across re-indexes of the same path so sub-indexes can key on `FileId`
11/// without invalidating string keys.
12pub type FileId = u32;
13
14/// Outline-style symbol entry. Populated during the index rebuild from
15/// the same tree-sitter parse that backs the typed symbol graph (issue
16/// #2456); files whose extension doesn't map to a known grammar leave
17/// `IndexedFile::symbols` empty.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct IndexedSymbol {
20 /// Symbol name (e.g. `"helper"`).
21 pub name: String,
22 /// Language-specific kind (`"function"`, `"struct"`, …).
23 pub kind: String,
24 /// 1-based start line.
25 pub start_line: u32,
26 /// 1-based inclusive end line.
27 pub end_line: u32,
28 /// Single-line signature/preview of the declaration.
29 pub signature: String,
30}
31
32/// Per-file metadata persisted in the index.
33#[derive(Debug, Clone)]
34pub struct IndexedFile {
35 /// Stable file identifier.
36 pub id: FileId,
37 /// Workspace-relative path with `/` separators. The empty string is
38 /// reserved for the root and never appears in the table.
39 pub relative_path: String,
40 /// Best-effort language tag (e.g. `"rust"`, `"swift"`, `"python"`). For
41 /// unrecognised extensions this is the extension itself.
42 pub language: String,
43 /// File size in bytes (UTF-8 contents).
44 pub size_bytes: u64,
45 /// Newline-delimited line count.
46 pub line_count: u32,
47 /// FNV-1a 64-bit content hash, used for cheap change detection.
48 pub content_hash: u64,
49 /// Last-modified time in milliseconds since the Unix epoch.
50 pub mtime_ms: i64,
51 /// Outline symbols extracted from the tree-sitter parse driven by
52 /// [`super::IndexState::rebuild_symbol_graph_for`]. Empty for files
53 /// without a recognised grammar.
54 pub symbols: Vec<IndexedSymbol>,
55 /// Raw import statement strings extracted from the file.
56 pub imports: Vec<String>,
57}
58
59/// Stable 64-bit FNV-1a hash used for content-change detection and
60/// snapshot compatibility.
61pub fn fnv1a64(bytes: &[u8]) -> u64 {
62 let mut h: u64 = 0xcbf2_9ce4_8422_2325;
63 for byte in bytes {
64 h ^= *byte as u64;
65 h = h.wrapping_mul(0x0000_0100_0000_01b3);
66 }
67 h
68}
69
70#[cfg(test)]
71mod tests {
72 use super::*;
73
74 #[test]
75 fn fnv_matches_swift_reference() {
76 // FNV-1a 64-bit is deterministic; this guards against accidental
77 // changes (e.g. switching to a different seed/prime) that would
78 // silently break shared snapshot interop.
79 assert_eq!(fnv1a64(b"hello world"), 0x779a_65e7_023c_d2e7);
80 assert_eq!(fnv1a64(b""), 0xcbf2_9ce4_8422_2325);
81 }
82}