Skip to main content

harn_hostlib/code_index/
file_table.rs

1//! Per-file metadata.
2//!
3//! `IndexedFile` holds the structural data the index needs at query time —
4//! language, size, content hash, raw import strings, and the list of
5//! outline symbols. `FileId` is a monotonically-assigned `u32` that all
6//! sub-indexes (`TrigramIndex`, `WordIndex`, `DepGraph`, `VersionLog`) key
7//! on so re-indexing a path doesn't have to invalidate string keys.
8
9/// Monotonically-assigned identifier for a file in the index. Stable
10/// across re-indexes of the same path so sub-indexes can key on `FileId`
11/// without invalidating string keys.
12pub type FileId = u32;
13
14/// Outline-style symbol entry. Reserved for AST integration; the code-index
15/// importer leaves `IndexedFile::symbols` empty, but the shape is kept stable
16/// so storage upgrades won't have to re-key.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct IndexedSymbol {
19    /// Symbol name (e.g. `"helper"`).
20    pub name: String,
21    /// Language-specific kind (`"function"`, `"struct"`, …).
22    pub kind: String,
23    /// 1-based start line.
24    pub start_line: u32,
25    /// 1-based inclusive end line.
26    pub end_line: u32,
27    /// Single-line signature/preview of the declaration.
28    pub signature: String,
29}
30
31/// Per-file metadata persisted in the index.
32#[derive(Debug, Clone)]
33pub struct IndexedFile {
34    /// Stable file identifier.
35    pub id: FileId,
36    /// Workspace-relative path with `/` separators. The empty string is
37    /// reserved for the root and never appears in the table.
38    pub relative_path: String,
39    /// Best-effort language tag (e.g. `"rust"`, `"swift"`, `"python"`). For
40    /// unrecognised extensions this is the extension itself.
41    pub language: String,
42    /// File size in bytes (UTF-8 contents).
43    pub size_bytes: u64,
44    /// Newline-delimited line count.
45    pub line_count: u32,
46    /// FNV-1a 64-bit content hash, used for cheap change detection.
47    pub content_hash: u64,
48    /// Last-modified time in milliseconds since the Unix epoch.
49    pub mtime_ms: i64,
50    /// Outline symbols supplied by callers that have richer syntax context.
51    pub symbols: Vec<IndexedSymbol>,
52    /// Raw import statement strings extracted from the file.
53    pub imports: Vec<String>,
54}
55
56/// Stable 64-bit FNV-1a hash used for content-change detection and
57/// snapshot compatibility.
58pub fn fnv1a64(bytes: &[u8]) -> u64 {
59    let mut h: u64 = 0xcbf2_9ce4_8422_2325;
60    for byte in bytes {
61        h ^= *byte as u64;
62        h = h.wrapping_mul(0x0000_0100_0000_01b3);
63    }
64    h
65}
66
67#[cfg(test)]
68mod tests {
69    use super::*;
70
71    #[test]
72    fn fnv_matches_swift_reference() {
73        // FNV-1a 64-bit is deterministic; this guards against accidental
74        // changes (e.g. switching to a different seed/prime) that would
75        // silently break shared snapshot interop.
76        assert_eq!(fnv1a64(b"hello world"), 0x779a_65e7_023c_d2e7);
77        assert_eq!(fnv1a64(b""), 0xcbf2_9ce4_8422_2325);
78    }
79}