Skip to main content

harn_hostlib/code_index/
file_table.rs

1//! Per-file metadata.
2//!
3//! `IndexedFile` holds the structural data the index needs at query time —
4//! language, size, content hash, raw import strings, and the list of
5//! outline symbols. `FileId` is a monotonically-assigned `u32` that all
6//! sub-indexes (`TrigramIndex`, `WordIndex`, `DepGraph`, `VersionLog`) key
7//! on so re-indexing a path doesn't have to invalidate string keys.
8
9/// Monotonically-assigned identifier for a file in the index. Stable
10/// across re-indexes of the same path so sub-indexes can key on `FileId`
11/// without invalidating string keys.
12pub type FileId = u32;
13
14/// Outline-style symbol entry. Populated during the index rebuild from
15/// the same tree-sitter parse that backs the typed symbol graph (issue
16/// #2456); files whose extension doesn't map to a known grammar leave
17/// `IndexedFile::symbols` empty.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct IndexedSymbol {
20    /// Symbol name (e.g. `"helper"`).
21    pub name: String,
22    /// Language-specific kind (`"function"`, `"struct"`, …).
23    pub kind: String,
24    /// Normalized declaration access level when known.
25    pub access_level: Option<String>,
26    /// 1-based start line.
27    pub start_line: u32,
28    /// 1-based inclusive end line.
29    pub end_line: u32,
30    /// Single-line signature/preview of the declaration.
31    pub signature: String,
32}
33
34/// Per-file metadata persisted in the index.
35#[derive(Debug, Clone)]
36pub struct IndexedFile {
37    /// Stable file identifier.
38    pub id: FileId,
39    /// Workspace-relative path with `/` separators. The empty string is
40    /// reserved for the root and never appears in the table.
41    pub relative_path: String,
42    /// Best-effort language tag (e.g. `"rust"`, `"swift"`, `"python"`). For
43    /// unrecognised extensions this is the extension itself.
44    pub language: String,
45    /// File size in bytes (UTF-8 contents).
46    pub size_bytes: u64,
47    /// Newline-delimited line count.
48    pub line_count: u32,
49    /// FNV-1a 64-bit content hash, used for cheap change detection.
50    pub content_hash: u64,
51    /// Last-modified time in milliseconds since the Unix epoch.
52    pub mtime_ms: i64,
53    /// Outline symbols extracted from the tree-sitter parse driven by
54    /// [`super::IndexState::rebuild_symbol_graph_for`]. Empty for files
55    /// without a recognised grammar.
56    pub symbols: Vec<IndexedSymbol>,
57    /// Raw import statement strings extracted from the file.
58    pub imports: Vec<String>,
59}
60
61/// Stable 64-bit FNV-1a hash used for content-change detection and
62/// snapshot compatibility.
63pub fn fnv1a64(bytes: &[u8]) -> u64 {
64    let mut h: u64 = 0xcbf2_9ce4_8422_2325;
65    for byte in bytes {
66        h ^= *byte as u64;
67        h = h.wrapping_mul(0x0000_0100_0000_01b3);
68    }
69    h
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75
76    #[test]
77    fn fnv_matches_swift_reference() {
78        // FNV-1a 64-bit is deterministic; this guards against accidental
79        // changes (e.g. switching to a different seed/prime) that would
80        // silently break shared snapshot interop.
81        assert_eq!(fnv1a64(b"hello world"), 0x779a_65e7_023c_d2e7);
82        assert_eq!(fnv1a64(b""), 0xcbf2_9ce4_8422_2325);
83    }
84}