index-transformer 1.0.0

Typestate transformer and orthogonal instruction set for Index.
Documentation
//! Transformed document cache primitives.

use std::collections::HashMap;

use index_core::IndexDocument;

/// Current transformer cache format version.
pub const TRANSFORMER_VERSION: &str = "transformer-v1";

/// Current adapter registry cache format version.
pub const ADAPTER_REGISTRY_VERSION: &str = "adapters-v1";

/// Stable key for transformed document cache entries.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct TransformCacheKey {
    source_url: Option<String>,
    content_hash: String,
    adapter_version: String,
    transformer_version: String,
}

impl TransformCacheKey {
    /// Builds a cache key from a source URL and source bytes.
    #[must_use]
    pub fn new(source_url: Option<&str>, content: &str) -> Self {
        Self {
            source_url: source_url.map(str::to_owned),
            content_hash: stable_hash_hex(content),
            adapter_version: ADAPTER_REGISTRY_VERSION.to_owned(),
            transformer_version: TRANSFORMER_VERSION.to_owned(),
        }
    }

    /// Returns the source URL component.
    #[must_use]
    pub fn source_url(&self) -> Option<&str> {
        self.source_url.as_deref()
    }

    /// Returns the stable content hash component.
    #[must_use]
    pub fn content_hash(&self) -> &str {
        &self.content_hash
    }
}

/// In-memory transformed document cache for repeated browsing workflows.
#[derive(Debug, Clone, Default)]
pub struct TransformedDocumentCache {
    entries: HashMap<TransformCacheKey, IndexDocument>,
}

impl TransformedDocumentCache {
    /// Creates an empty transformed document cache.
    #[must_use]
    pub fn new() -> Self {
        Self::default()
    }

    /// Returns the cached document for a matching key.
    #[must_use]
    pub fn get(&self, key: &TransformCacheKey) -> Option<IndexDocument> {
        self.entries.get(key).cloned()
    }

    /// Stores a transformed document.
    pub fn insert(&mut self, key: TransformCacheKey, document: IndexDocument) {
        self.entries.insert(key, document);
    }

    /// Returns the number of transformed documents currently retained.
    #[must_use]
    pub fn len(&self) -> usize {
        self.entries.len()
    }

    /// Returns whether the cache is empty.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.entries.is_empty()
    }
}

/// Returns a deterministic, dependency-light FNV-1a hash as hex.
#[must_use]
pub fn stable_hash_hex(content: &str) -> String {
    let mut hash = 0xcbf2_9ce4_8422_2325_u64;
    for byte in content.as_bytes() {
        hash ^= u64::from(*byte);
        hash = hash.wrapping_mul(0x0000_0100_0000_01b3);
    }
    format!("{hash:016x}")
}

#[cfg(test)]
mod tests {
    use index_core::{IndexDocument, IndexNode};

    use super::{TransformCacheKey, TransformedDocumentCache, stable_hash_hex};

    fn document(title: &str) -> IndexDocument {
        let mut document = IndexDocument::titled(title);
        document.push(IndexNode::Paragraph("cached".to_owned()));
        document
    }

    #[test]
    fn cache_roundtrips_transformed_documents() {
        let key = TransformCacheKey::new(Some("https://example.org"), "<p>hello</p>");
        let mut cache = TransformedDocumentCache::new();
        cache.insert(key.clone(), document("Cached"));

        let cached = cache.get(&key);

        assert_eq!(cache.len(), 1);
        assert!(matches!(cached, Some(document) if document.title == "Cached"));
    }

    #[test]
    fn cache_key_invalidates_on_source_content_or_url_change() {
        let base = TransformCacheKey::new(Some("https://example.org/a"), "<p>hello</p>");
        let changed_content = TransformCacheKey::new(Some("https://example.org/a"), "<p>bye</p>");
        let changed_url = TransformCacheKey::new(Some("https://example.org/b"), "<p>hello</p>");

        assert_ne!(base, changed_content);
        assert_ne!(base, changed_url);
    }

    #[test]
    fn cache_key_invalidates_on_adapter_or_transformer_version_change() {
        let base = TransformCacheKey::new(Some("https://example.org"), "content");
        let changed_adapter = TransformCacheKey {
            source_url: base.source_url.clone(),
            content_hash: base.content_hash.clone(),
            adapter_version: "adapters-v-next".to_owned(),
            transformer_version: base.transformer_version.clone(),
        };
        let changed_transformer = TransformCacheKey {
            source_url: base.source_url.clone(),
            content_hash: base.content_hash.clone(),
            adapter_version: base.adapter_version.clone(),
            transformer_version: "transformer-v-next".to_owned(),
        };

        assert_ne!(base, changed_adapter);
        assert_ne!(base, changed_transformer);
    }

    #[test]
    fn stable_hash_is_deterministic() {
        assert_eq!(stable_hash_hex("Index"), stable_hash_hex("Index"));
        assert_ne!(stable_hash_hex("Index"), stable_hash_hex("index"));
    }
}