agent-source-repository 0.1.0

Agent Source Repository local context registry for coding agents
Documentation
use crate::Chunk;

pub(crate) const FNV_OFFSET: u64 = 0xcbf29ce484222325;
const FNV_PRIME: u64 = 0x100000001b3;

/// FNV-1a over `bytes` with a per-call domain separator (`0xff` sentinel).
///
/// The sentinel ensures that adjacent fields hash differently even when one
/// field is empty: `update("ab", "")` ≠ `update("a", "b")`.
pub(crate) fn update_hash(hash: &mut u64, bytes: &[u8]) {
    for byte in bytes {
        *hash ^= u64::from(*byte);
        *hash = hash.wrapping_mul(FNV_PRIME);
    }
    *hash ^= 0xff;
    *hash = hash.wrapping_mul(FNV_PRIME);
}

pub(crate) fn hash_text(text: &str) -> String {
    let mut hash = FNV_OFFSET;
    update_hash(&mut hash, text.as_bytes());
    format!("{hash:016x}")
}

/// Stable content fingerprint over a chunk set.
///
/// The sort order (path → start_line → end_line) and the use of decimal
/// string encoding for line numbers are load-bearing: changing either would
/// invalidate all existing stored hashes.
pub(crate) fn content_hash_for_chunks(chunks: &[Chunk]) -> String {
    let mut sorted: Vec<&Chunk> = chunks.iter().collect();
    sorted.sort_by(|a, b| {
        a.file_path
            .cmp(&b.file_path)
            .then_with(|| a.start_line.cmp(&b.start_line))
            .then_with(|| a.end_line.cmp(&b.end_line))
    });
    let mut hash = FNV_OFFSET;
    for chunk in sorted {
        update_hash(&mut hash, chunk.file_path.as_bytes());
        update_hash(&mut hash, chunk.start_line.to_string().as_bytes());
        update_hash(&mut hash, chunk.end_line.to_string().as_bytes());
        if let Some(lang) = &chunk.language {
            update_hash(&mut hash, lang.as_bytes());
        }
        update_hash(&mut hash, chunk.content.as_bytes());
    }
    format!("{hash:016x}")
}