Skip to main content

harn_hostlib/code_index/
file_table.rs

1//! Per-file metadata.
2//!
3//! `IndexedFile` holds the structural data the index needs at query time —
4//! language, size, content hash, raw import strings, and the list of
5//! outline symbols. `FileId` is a monotonically-assigned `u32` that all
6//! sub-indexes (`TrigramIndex`, `WordIndex`, `DepGraph`, `VersionLog`) key
7//! on so re-indexing a path doesn't have to invalidate string keys.
8
9/// Monotonically-assigned identifier for a file in the index. Stable
10/// across re-indexes of the same path so sub-indexes can key on `FileId`
11/// without invalidating string keys.
12pub type FileId = u32;
13
14/// Outline-style symbol entry. Populated during the index rebuild from
15/// the same tree-sitter parse that backs the typed symbol graph (issue
16/// #2456); files whose extension doesn't map to a known grammar leave
17/// `IndexedFile::symbols` empty.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct IndexedSymbol {
20    /// Symbol name (e.g. `"helper"`).
21    pub name: String,
22    /// Language-specific kind (`"function"`, `"struct"`, …).
23    pub kind: String,
24    /// 1-based start line.
25    pub start_line: u32,
26    /// 1-based inclusive end line.
27    pub end_line: u32,
28    /// Single-line signature/preview of the declaration.
29    pub signature: String,
30}
31
32/// Per-file metadata persisted in the index.
33#[derive(Debug, Clone)]
34pub struct IndexedFile {
35    /// Stable file identifier.
36    pub id: FileId,
37    /// Workspace-relative path with `/` separators. The empty string is
38    /// reserved for the root and never appears in the table.
39    pub relative_path: String,
40    /// Best-effort language tag (e.g. `"rust"`, `"swift"`, `"python"`). For
41    /// unrecognised extensions this is the extension itself.
42    pub language: String,
43    /// File size in bytes (UTF-8 contents).
44    pub size_bytes: u64,
45    /// Newline-delimited line count.
46    pub line_count: u32,
47    /// FNV-1a 64-bit content hash, used for cheap change detection.
48    pub content_hash: u64,
49    /// Last-modified time in milliseconds since the Unix epoch.
50    pub mtime_ms: i64,
51    /// Outline symbols extracted from the tree-sitter parse driven by
52    /// [`super::IndexState::rebuild_symbol_graph_for`]. Empty for files
53    /// without a recognised grammar.
54    pub symbols: Vec<IndexedSymbol>,
55    /// Raw import statement strings extracted from the file.
56    pub imports: Vec<String>,
57}
58
59/// Stable 64-bit FNV-1a hash used for content-change detection and
60/// snapshot compatibility.
61pub fn fnv1a64(bytes: &[u8]) -> u64 {
62    let mut h: u64 = 0xcbf2_9ce4_8422_2325;
63    for byte in bytes {
64        h ^= *byte as u64;
65        h = h.wrapping_mul(0x0000_0100_0000_01b3);
66    }
67    h
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    #[test]
75    fn fnv_matches_swift_reference() {
76        // FNV-1a 64-bit is deterministic; this guards against accidental
77        // changes (e.g. switching to a different seed/prime) that would
78        // silently break shared snapshot interop.
79        assert_eq!(fnv1a64(b"hello world"), 0x779a_65e7_023c_d2e7);
80        assert_eq!(fnv1a64(b""), 0xcbf2_9ce4_8422_2325);
81    }
82}