harn_hostlib/code_index/file_table.rs
1//! Per-file metadata.
2//!
3//! `IndexedFile` holds the structural data the index needs at query time —
4//! language, size, content hash, raw import strings, and the list of
5//! outline symbols. `FileId` is a monotonically-assigned `u32` that all
6//! sub-indexes (`TrigramIndex`, `WordIndex`, `DepGraph`, `VersionLog`) key
7//! on so re-indexing a path doesn't have to invalidate string keys.
8
9/// Monotonically-assigned identifier for a file in the index. Stable
10/// across re-indexes of the same path so sub-indexes can key on `FileId`
11/// without invalidating string keys.
12pub type FileId = u32;
13
14/// Outline-style symbol entry. Reserved for AST integration; the code-index
15/// importer leaves `IndexedFile::symbols` empty, but the shape is kept stable
16/// so storage upgrades won't have to re-key.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct IndexedSymbol {
19 /// Symbol name (e.g. `"helper"`).
20 pub name: String,
21 /// Language-specific kind (`"function"`, `"struct"`, …).
22 pub kind: String,
23 /// 1-based start line.
24 pub start_line: u32,
25 /// 1-based inclusive end line.
26 pub end_line: u32,
27 /// Single-line signature/preview of the declaration.
28 pub signature: String,
29}
30
31/// Per-file metadata persisted in the index.
32#[derive(Debug, Clone)]
33pub struct IndexedFile {
34 /// Stable file identifier.
35 pub id: FileId,
36 /// Workspace-relative path with `/` separators. The empty string is
37 /// reserved for the root and never appears in the table.
38 pub relative_path: String,
39 /// Best-effort language tag (e.g. `"rust"`, `"swift"`, `"python"`). For
40 /// unrecognised extensions this is the extension itself.
41 pub language: String,
42 /// File size in bytes (UTF-8 contents).
43 pub size_bytes: u64,
44 /// Newline-delimited line count.
45 pub line_count: u32,
46 /// FNV-1a 64-bit content hash, used for cheap change detection.
47 pub content_hash: u64,
48 /// Last-modified time in milliseconds since the Unix epoch.
49 pub mtime_ms: i64,
50 /// Outline symbols supplied by callers that have richer syntax context.
51 pub symbols: Vec<IndexedSymbol>,
52 /// Raw import statement strings extracted from the file.
53 pub imports: Vec<String>,
54}
55
56/// Stable 64-bit FNV-1a hash used for content-change detection and
57/// snapshot compatibility.
58pub fn fnv1a64(bytes: &[u8]) -> u64 {
59 let mut h: u64 = 0xcbf2_9ce4_8422_2325;
60 for byte in bytes {
61 h ^= *byte as u64;
62 h = h.wrapping_mul(0x0000_0100_0000_01b3);
63 }
64 h
65}
66
67#[cfg(test)]
68mod tests {
69 use super::*;
70
71 #[test]
72 fn fnv_matches_swift_reference() {
73 // FNV-1a 64-bit is deterministic; this guards against accidental
74 // changes (e.g. switching to a different seed/prime) that would
75 // silently break shared snapshot interop.
76 assert_eq!(fnv1a64(b"hello world"), 0x779a_65e7_023c_d2e7);
77 assert_eq!(fnv1a64(b""), 0xcbf2_9ce4_8422_2325);
78 }
79}