geographdb-core 0.4.0

Geometric graph database core - 3D spatial indexing for code analysis
Documentation
//! Graph data adapter for sectioned storage
//!
//! Provides serialization/deserialization of graph data (nodes and edges)
//! to/from a GRAPH section in a sectioned file.

use anyhow::{Context, Result};

use super::data_structures::{EdgeRec, MetadataRec, NodeRec};
use super::sectioned::SectionedStorage;

/// Graph data container for serialization
///
/// Format compatible with StorageManager file layout:
/// - Node count (u64 LE)
/// - Node records (N * 72 bytes)
/// - Edge count (u64 LE)
/// - Edge records (M * 48 bytes)
/// - Metadata count (u64 LE)
/// - Metadata records (K * 176 bytes)
#[derive(Debug, Clone, Default)]
pub struct GraphData {
    pub nodes: Vec<NodeRec>,
    pub edges: Vec<EdgeRec>,
    pub metadata: Vec<Option<MetadataRec>>,
}

impl GraphData {
    /// Serialize graph data to bytes
    pub fn to_bytes(&self) -> Vec<u8> {
        let mut bytes = Vec::new();

        // Node count
        let node_count: u64 = self.nodes.len() as u64;
        bytes.extend_from_slice(&node_count.to_le_bytes());

        // Node records
        for node in &self.nodes {
            bytes.extend_from_slice(bytemuck::bytes_of(node));
        }

        // Edge count
        let edge_count: u64 = self.edges.len() as u64;
        bytes.extend_from_slice(&edge_count.to_le_bytes());

        // Edge records
        for edge in &self.edges {
            bytes.extend_from_slice(bytemuck::bytes_of(edge));
        }

        // Metadata count
        let metadata_count: u64 = self.metadata.len() as u64;
        bytes.extend_from_slice(&metadata_count.to_le_bytes());

        // Metadata records
        for meta in &self.metadata {
            if let Some(m) = meta {
                bytes.extend_from_slice(bytemuck::bytes_of(m));
            } else {
                // Write zeroed metadata record
                bytes.extend_from_slice(bytemuck::bytes_of(&MetadataRec::default()));
            }
        }

        bytes
    }

    /// Deserialize graph data from bytes
    pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
        if bytes.len() < 8 {
            return Err(anyhow::anyhow!("Graph data too short: {}", bytes.len()));
        }

        let mut pos = 0;

        // Read node count
        let node_count = u64::from_le_bytes(
            bytes[pos..pos + 8]
                .try_into()
                .context("Invalid node count bytes")?,
        ) as usize;
        pos += 8;

        // Validate we have enough data for nodes
        let node_bytes = node_count * std::mem::size_of::<NodeRec>();
        if pos + node_bytes > bytes.len() {
            return Err(anyhow::anyhow!(
                "Not enough data for nodes: need {}, have {}",
                pos + node_bytes,
                bytes.len()
            ));
        }

        // Read node records
        let mut nodes = Vec::with_capacity(node_count);
        for i in 0..node_count {
            let start = pos + i * std::mem::size_of::<NodeRec>();
            let end = start + std::mem::size_of::<NodeRec>();
            let node_rec: &NodeRec = bytemuck::try_from_bytes::<NodeRec>(&bytes[start..end])
                .map_err(|e| anyhow::anyhow!("Invalid node record: {}", e))?;
            nodes.push(*node_rec);
        }
        pos += node_bytes;

        // Read edge count
        if pos + 8 > bytes.len() {
            return Err(anyhow::anyhow!("Not enough data for edge count"));
        }
        let edge_count = u64::from_le_bytes(
            bytes[pos..pos + 8]
                .try_into()
                .context("Invalid edge count bytes")?,
        ) as usize;
        pos += 8;

        // Validate we have enough data for edges
        let edge_bytes = edge_count * std::mem::size_of::<EdgeRec>();
        if pos + edge_bytes > bytes.len() {
            return Err(anyhow::anyhow!(
                "Not enough data for edges: need {}, have {}",
                pos + edge_bytes,
                bytes.len()
            ));
        }

        // Read edge records
        let mut edges = Vec::with_capacity(edge_count);
        for i in 0..edge_count {
            let start = pos + i * std::mem::size_of::<EdgeRec>();
            let end = start + std::mem::size_of::<EdgeRec>();
            let edge_rec: &EdgeRec = bytemuck::try_from_bytes::<EdgeRec>(&bytes[start..end])
                .map_err(|e| anyhow::anyhow!("Invalid edge record: {}", e))?;
            edges.push(*edge_rec);
        }
        pos += edge_bytes;

        // Read metadata count
        if pos + 8 > bytes.len() {
            return Err(anyhow::anyhow!("Not enough data for metadata count"));
        }
        let metadata_count = u64::from_le_bytes(
            bytes[pos..pos + 8]
                .try_into()
                .context("Invalid metadata count bytes")?,
        ) as usize;
        pos += 8;

        // Read metadata records
        let mut metadata = Vec::with_capacity(metadata_count);
        for i in 0..metadata_count {
            let start = pos + i * std::mem::size_of::<MetadataRec>();
            let end = start + std::mem::size_of::<MetadataRec>();
            if end > bytes.len() {
                return Err(anyhow::anyhow!("Not enough data for metadata records"));
            }
            let meta_rec: &MetadataRec =
                bytemuck::try_from_bytes::<MetadataRec>(&bytes[start..end])
                    .map_err(|e| anyhow::anyhow!("Invalid metadata record: {}", e))?;
            metadata.push(Some(*meta_rec));
        }

        Ok(Self {
            nodes,
            edges,
            metadata,
        })
    }

    /// Calculate required capacity for storing this graph data
    pub fn required_capacity(&self) -> usize {
        8 // node count
            + self.nodes.len() * std::mem::size_of::<NodeRec>()
            + 8 // edge count
            + self.edges.len() * std::mem::size_of::<EdgeRec>()
            + 8 // metadata count
            + self.metadata.len() * std::mem::size_of::<MetadataRec>()
    }
}

/// Graph section adapter
///
/// Handles reading/writing graph data from/to a GRAPH section.
pub struct GraphSectionAdapter;

impl GraphSectionAdapter {
    pub const SECTION_NAME: &'static str = "GRAPH";

    /// Load graph data from the GRAPH section
    pub fn load(storage: &mut SectionedStorage) -> Result<GraphData> {
        let bytes = storage
            .read_section(Self::SECTION_NAME)
            .context("GRAPH section not found or empty")?;
        GraphData::from_bytes(&bytes).context("Failed to parse GRAPH section")
    }

    /// Save graph data to the GRAPH section
    ///
    /// Handles auto-resizing if capacity is exceeded.
    pub fn save(storage: &mut SectionedStorage, data: &GraphData) -> Result<()> {
        let bytes = data.to_bytes();
        let required = bytes.len() as u64;

        // Check if section exists
        if storage.get_section(Self::SECTION_NAME).is_some() {
            let result = storage.write_section(Self::SECTION_NAME, &bytes);

            if let Err(e) = result {
                // Check if it's a capacity error
                if e.to_string().contains("overflow") || e.to_string().contains("capacity") {
                    // Need to resize - use 2x current capacity or enough for data, whichever is larger
                    let current = storage.get_section(Self::SECTION_NAME).unwrap();
                    let new_capacity = (current.capacity * 2).max(required * 2);
                    storage
                        .resize_section(Self::SECTION_NAME, new_capacity)
                        .context("Failed to resize GRAPH section")?;
                    // Retry write
                    storage.write_section(Self::SECTION_NAME, &bytes)?;
                } else {
                    return Err(e);
                }
            }
        } else {
            // Create new section with reasonable initial capacity (1MB)
            let section_capacity = (1024 * 1024).max(required * 2);
            storage.create_section(Self::SECTION_NAME, section_capacity, 0)?;
            storage.write_section(Self::SECTION_NAME, &bytes)?;
        }

        storage.flush()?;
        Ok(())
    }

    /// Initialize an empty GRAPH section with reasonable default capacity
    pub fn init(storage: &mut SectionedStorage) -> Result<()> {
        // Create section with default capacity (1MB) for future growth
        let default_capacity = 1024 * 1024; // 1MB
        storage.create_section(Self::SECTION_NAME, default_capacity, 0)?;

        // Write empty data
        let empty = GraphData::default();
        let bytes = empty.to_bytes();
        storage.write_section(Self::SECTION_NAME, &bytes)?;
        storage.flush()?;

        Ok(())
    }

    /// Check if GRAPH section exists
    pub fn exists(storage: &SectionedStorage) -> bool {
        storage.get_section(Self::SECTION_NAME).is_some()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use bytemuck::Zeroable;

    #[test]
    fn test_graph_data_serialization_roundtrip() {
        let nodes = vec![NodeRec {
            id: 1,
            morton_code: 42,
            x: 1.0,
            y: 2.0,
            z: 3.0,
            edge_off: 0,
            edge_len: 0,
            flags: 0,
            begin_ts: 0,
            end_ts: 0,
            tx_id: 0,
            visibility: 1,
            _padding: [0; 7],
        }];

        let edges = vec![EdgeRec {
            src: 1,
            dst: 2,
            w: 1.5,
            flags: 0,
            begin_ts: 0,
            end_ts: 0,
            tx_id: 0,
            visibility: 1,
            _padding: [0; 7],
        }];

        let metadata = vec![Some(MetadataRec::from_strings(
            "function", "return", 10, 20, 1, 0, 1, 10,
        ))];

        let original = GraphData {
            nodes,
            edges,
            metadata,
        };

        let bytes = original.to_bytes();
        let restored = GraphData::from_bytes(&bytes).unwrap();

        assert_eq!(restored.nodes.len(), original.nodes.len());
        assert_eq!(restored.nodes[0].id, 1);
        assert_eq!(restored.edges.len(), original.edges.len());
        assert_eq!(restored.edges[0].src, 1);
        assert_eq!(restored.metadata.len(), original.metadata.len());
        assert_eq!(
            restored.metadata[0].as_ref().unwrap().get_block_kind(),
            "function"
        );
    }

    #[test]
    fn test_empty_graph_data() {
        let empty = GraphData::default();
        let bytes = empty.to_bytes();
        assert!(bytes.len() >= 8); // At least node count

        let restored = GraphData::from_bytes(&bytes).unwrap();
        assert_eq!(restored.nodes.len(), 0);
        assert_eq!(restored.edges.len(), 0);
        assert_eq!(restored.metadata.len(), 0);
    }

    #[test]
    fn test_required_capacity() {
        let data = GraphData {
            nodes: vec![NodeRec::zeroed(); 10],
            edges: vec![EdgeRec::zeroed(); 5],
            metadata: vec![Some(MetadataRec::default()); 10],
        };

        // Use actual struct sizes in case they change
        let expected = 8 // node count
            + 10 * std::mem::size_of::<NodeRec>()
            + 8 // edge count
            + 5 * std::mem::size_of::<EdgeRec>()
            + 8 // metadata count
            + 10 * std::mem::size_of::<MetadataRec>();
        assert_eq!(data.required_capacity(), expected);
    }
}