geographdb-core 0.3.1

Geometric graph database core - 3D spatial indexing for code analysis
Documentation
//! CFG data adapter for sectioned storage
//!
//! Provides serialization/deserialization of CFG data (blocks and edges)
//! to/from a CFG section in a sectioned file.

use super::sectioned::SectionedStorage;
use anyhow::{Context, Result};

/// CFG edge representation
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CfgEdge {
    pub src_id: u64,
    pub dst_id: u64,
    pub edge_type: u32,
}

/// CFG block for serialization
#[derive(Debug, Clone)]
pub struct SerializableCfgBlock {
    pub id: u64,
    pub function_id: i64,
    pub block_kind: String,
    pub terminator: String,
    pub byte_start: u64,
    pub byte_end: u64,
    pub start_line: u64,
    pub start_col: u64,
    pub end_line: u64,
    pub end_col: u64,
    pub dominator_depth: u32,
    pub loop_nesting: u32,
    pub branch_count: u32,
    pub out_edges: Vec<usize>,
    pub cfg_hash: Option<String>,
    pub statements: Option<Vec<String>>,
}

#[derive(Debug, Clone, Default)]
pub struct CfgData {
    pub blocks: Vec<SerializableCfgBlock>,
    pub edges: Vec<CfgEdge>,
}

impl CfgData {
    pub fn to_bytes(&self) -> Vec<u8> {
        let mut bytes = Vec::new();
        let block_count: u64 = self.blocks.len() as u64;
        bytes.extend_from_slice(&block_count.to_le_bytes());

        for block in &self.blocks {
            bytes.extend_from_slice(&block.id.to_le_bytes());
            bytes.extend_from_slice(&block.function_id.to_le_bytes());
            bytes.extend_from_slice(&block.byte_start.to_le_bytes());
            bytes.extend_from_slice(&block.byte_end.to_le_bytes());
            bytes.extend_from_slice(&block.start_line.to_le_bytes());
            bytes.extend_from_slice(&block.start_col.to_le_bytes());
            bytes.extend_from_slice(&block.end_line.to_le_bytes());
            bytes.extend_from_slice(&block.end_col.to_le_bytes());
            bytes.extend_from_slice(&block.dominator_depth.to_le_bytes());
            bytes.extend_from_slice(&block.loop_nesting.to_le_bytes());
            bytes.extend_from_slice(&block.branch_count.to_le_bytes());
            bytes.extend_from_slice(&(block.out_edges.len() as u32).to_le_bytes());

            let write_str = |b: &mut Vec<u8>, s: &str| {
                let sb = s.as_bytes();
                let slen = sb.len().min(65535) as u16;
                b.extend_from_slice(&slen.to_le_bytes());
                b.extend_from_slice(&sb[..slen as usize]);
            };

            write_str(&mut bytes, &block.block_kind);
            write_str(&mut bytes, &block.terminator);

            for &edge_idx in &block.out_edges {
                bytes.extend_from_slice(&(edge_idx as u32).to_le_bytes());
            }

            match &block.cfg_hash {
                Some(h) => {
                    bytes.push(1);
                    write_str(&mut bytes, h);
                }
                None => {
                    bytes.push(0);
                }
            }

            match &block.statements {
                Some(s) => {
                    bytes.push(1);
                    let json = serde_json::to_string(s).unwrap_or_default();
                    let jb = json.as_bytes();
                    bytes.extend_from_slice(&(jb.len() as u32).to_le_bytes());
                    bytes.extend_from_slice(jb);
                }
                None => {
                    bytes.push(0);
                }
            }
        }

        let edge_count: u64 = self.edges.len() as u64;
        bytes.extend_from_slice(&edge_count.to_le_bytes());
        for edge in &self.edges {
            bytes.extend_from_slice(&edge.src_id.to_le_bytes());
            bytes.extend_from_slice(&edge.dst_id.to_le_bytes());
            bytes.extend_from_slice(&edge.edge_type.to_le_bytes());
        }
        bytes
    }

    pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
        if bytes.len() < 8 {
            return Err(anyhow::anyhow!("CFG data too short"));
        }
        let mut pos = 0;
        let block_count = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?) as usize;
        pos += 8;
        let mut blocks = Vec::with_capacity(block_count);

        for _ in 0..block_count {
            let id = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
            pos += 8;
            let function_id = i64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
            pos += 8;
            let byte_start = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
            pos += 8;
            let byte_end = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
            pos += 8;
            let start_line = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
            pos += 8;
            let start_col = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
            pos += 8;
            let end_line = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
            pos += 8;
            let end_col = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
            pos += 8;
            let dominator_depth = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?);
            pos += 4;
            let loop_nesting = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?);
            pos += 4;
            let branch_count = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?);
            pos += 4;
            let out_edge_count = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?) as usize;
            pos += 4;

            let read_str = |p: &mut usize| -> Result<String> {
                let slen = u16::from_le_bytes(bytes[*p..*p + 2].try_into()?) as usize;
                *p += 2;
                let s = String::from_utf8_lossy(&bytes[*p..*p + slen]).to_string();
                *p += slen;
                Ok(s)
            };

            let block_kind = read_str(&mut pos)?;
            let terminator = read_str(&mut pos)?;

            let mut out_edges = Vec::with_capacity(out_edge_count);
            for _ in 0..out_edge_count {
                out_edges.push(u32::from_le_bytes(bytes[pos..pos + 4].try_into()?) as usize);
                pos += 4;
            }

            let cfg_hash = if bytes[pos] == 1 {
                pos += 1;
                Some(read_str(&mut pos)?)
            } else {
                pos += 1;
                None
            };
            let statements = if bytes[pos] == 1 {
                pos += 1;
                let jlen = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?) as usize;
                pos += 4;
                let s = serde_json::from_str(&String::from_utf8_lossy(&bytes[pos..pos + jlen]))?;
                pos += jlen;
                Some(s)
            } else {
                pos += 1;
                None
            };

            blocks.push(SerializableCfgBlock {
                id,
                function_id,
                block_kind,
                terminator,
                byte_start,
                byte_end,
                start_line,
                start_col,
                end_line,
                end_col,
                dominator_depth,
                loop_nesting,
                branch_count,
                out_edges,
                cfg_hash,
                statements,
            });
        }

        let edge_count = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?) as usize;
        pos += 8;
        let mut edges = Vec::with_capacity(edge_count);
        for _ in 0..edge_count {
            let src_id = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
            pos += 8;
            let dst_id = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
            pos += 8;
            let edge_type = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?);
            pos += 4;
            edges.push(CfgEdge {
                src_id,
                dst_id,
                edge_type,
            });
        }
        Ok(CfgData { blocks, edges })
    }

    pub fn required_capacity(&self) -> usize {
        let mut size = 8;
        for block in &self.blocks {
            size += 80
                + 2
                + block.block_kind.len()
                + 2
                + block.terminator.len()
                + 4 * block.out_edges.len();
            size += 1 + block.cfg_hash.as_ref().map(|h| 2 + h.len()).unwrap_or(0);
            size += 1 + block
                .statements
                .as_ref()
                .map(|s| 4 + serde_json::to_string(s).unwrap_or_default().len())
                .unwrap_or(0);
        }
        size += 8 + 20 * self.edges.len();
        size
    }
}

pub struct CfgSectionAdapter;
impl CfgSectionAdapter {
    pub const SECTION_NAME: &'static str = "CFG";
    pub fn load(storage: &mut SectionedStorage) -> Result<CfgData> {
        let bytes = storage
            .read_section(Self::SECTION_NAME)
            .context("CFG section missing")?;
        CfgData::from_bytes(&bytes)
    }
    pub fn save(storage: &mut SectionedStorage, data: &CfgData) -> Result<()> {
        let bytes = data.to_bytes();
        let required = bytes.len() as u64;

        if storage.get_section(Self::SECTION_NAME).is_some() {
            let result = storage.write_section(Self::SECTION_NAME, &bytes);

            if let Err(e) = result {
                if e.to_string().contains("overflow") || e.to_string().contains("capacity") {
                    let current = storage.get_section(Self::SECTION_NAME).unwrap();
                    let new_capacity = (current.capacity * 2).max(required * 2);
                    storage
                        .resize_section(Self::SECTION_NAME, new_capacity)
                        .context("Failed to resize CFG section")?;
                    storage.write_section(Self::SECTION_NAME, &bytes)?;
                } else {
                    return Err(e);
                }
            }
        } else {
            let section_capacity = (1024 * 1024).max(required * 2);
            storage.create_section(Self::SECTION_NAME, section_capacity, 0)?;
            storage.write_section(Self::SECTION_NAME, &bytes)?;
        }
        storage.flush()?;
        Ok(())
    }
    pub fn init(storage: &mut SectionedStorage) -> Result<()> {
        let default_capacity = 1024 * 1024; // 1MB
        storage.create_section(Self::SECTION_NAME, default_capacity, 0)?;
        let empty = CfgData::default();
        let bytes = empty.to_bytes();
        storage.write_section(Self::SECTION_NAME, &bytes)?;
        storage.flush()?;
        Ok(())
    }
    pub fn exists(storage: &SectionedStorage) -> bool {
        storage.get_section(Self::SECTION_NAME).is_some()
    }
}