heddle-semantic 0.8.0

An AI-native version control system
Documentation
// SPDX-License-Identifier: Apache-2.0
//! Helper utilities for semantic diff.

use std::{cell::RefCell, collections::HashMap, path::Path};

use objects::{
    object::{Blob, ContentHash, Tree},
    store::ObjectStore,
};

pub(super) struct TreeBlobContentLoader<'a, S: ObjectStore + ?Sized> {
    store: &'a S,
    root_hash: ContentHash,
    trees: RefCell<HashMap<ContentHash, Option<Tree>>>,
}

impl<'a, S: ObjectStore + ?Sized> TreeBlobContentLoader<'a, S> {
    pub(super) fn new(store: &'a S, root_hash: ContentHash) -> Self {
        Self {
            store,
            root_hash,
            trees: RefCell::new(HashMap::new()),
        }
    }

    pub(super) fn load_content(&self, path: &Path) -> Result<Option<String>, anyhow::Error> {
        let Some(root) = self.get_tree(&self.root_hash)? else {
            return Ok(None);
        };
        let Some(blob) = self.get_blob_at_path(&root, &path.display().to_string())? else {
            return Ok(None);
        };
        Ok(blob.content_str().map(ToOwned::to_owned))
    }

    #[cfg(test)]
    fn cached_tree_count(&self) -> usize {
        self.trees.borrow().len()
    }

    fn get_tree(&self, hash: &ContentHash) -> Result<Option<Tree>, anyhow::Error> {
        if let Some(tree) = self.trees.borrow().get(hash).cloned() {
            return Ok(tree);
        }

        let tree = self.store.get_tree(hash)?;
        self.trees.borrow_mut().insert(*hash, tree.clone());
        Ok(tree)
    }

    fn get_blob_at_path(&self, tree: &Tree, path: &str) -> Result<Option<Blob>, anyhow::Error> {
        let parts: Vec<&str> = path.split('/').collect();
        self.get_blob_recursive(tree, &parts)
    }

    fn get_blob_recursive(
        &self,
        tree: &Tree,
        parts: &[&str],
    ) -> Result<Option<Blob>, anyhow::Error> {
        if parts.is_empty() {
            return Ok(None);
        }

        let name = parts[0];
        let entry = match tree.get(name) {
            Some(e) => e,
            None => return Ok(None),
        };

        if parts.len() == 1 {
            if let Some(blob_hash) = entry.blob_hash() {
                return Ok(self.store.get_blob(&blob_hash)?);
            }
        } else if let Some(tree_hash) = entry.tree_hash()
            && let Some(subtree) = self.get_tree(&tree_hash)?
        {
            return self.get_blob_recursive(&subtree, &parts[1..]);
        }

        Ok(None)
    }
}

#[cfg(test)]
mod tests {
    use objects::{
        object::{Blob, EntryType, TreeEntry},
        store::{InMemoryStore, ObjectStore},
    };

    use super::*;

    fn put_blob(store: &InMemoryStore, content: &str) -> ContentHash {
        store.put_blob(&Blob::from(content)).unwrap()
    }

    fn put_tree(
        store: &InMemoryStore,
        entries: Vec<(&str, ContentHash, EntryType)>,
    ) -> ContentHash {
        let tree = Tree::from_entries(
            entries
                .into_iter()
                .map(|(name, hash, entry_type)| match entry_type {
                    EntryType::Blob => TreeEntry::file(name.to_string(), hash, false),
                    EntryType::Tree => TreeEntry::directory(name.to_string(), hash),
                    EntryType::Symlink => TreeEntry::symlink(name.to_string(), hash),
                    EntryType::Gitlink => unreachable!("semantic helper does not build gitlinks"),
                    EntryType::Spoollink => {
                        unreachable!("semantic helper does not build spoollinks")
                    }
                })
                .collect::<Result<Vec<_>, _>>()
                .unwrap(),
        );
        store.put_tree(&tree).unwrap()
    }

    #[test]
    fn tree_blob_loader_reuses_cached_subtrees_for_sibling_paths() {
        let store = InMemoryStore::new();
        let nested = put_tree(
            &store,
            vec![
                (
                    "first.rs",
                    put_blob(&store, "fn first() {}\n"),
                    EntryType::Blob,
                ),
                (
                    "second.rs",
                    put_blob(&store, "fn second() {}\n"),
                    EntryType::Blob,
                ),
            ],
        );
        let root = put_tree(&store, vec![("src", nested, EntryType::Tree)]);
        let loader = TreeBlobContentLoader::new(&store, root);

        assert_eq!(
            loader
                .load_content(Path::new("src/first.rs"))
                .unwrap()
                .as_deref(),
            Some("fn first() {}\n")
        );
        assert_eq!(
            loader
                .load_content(Path::new("src/second.rs"))
                .unwrap()
                .as_deref(),
            Some("fn second() {}\n")
        );
        assert_eq!(
            loader.cached_tree_count(),
            2,
            "loader should cache the root and shared src subtree instead of resolving them per path"
        );
    }
}