Skip to main content

geographdb_core/storage/
sectioned_cfg.rs

1//! CFG data adapter for sectioned storage
2//!
3//! Provides serialization/deserialization of CFG data (blocks and edges)
4//! to/from a CFG section in a sectioned file.
5
6use super::sectioned::SectionedStorage;
7use anyhow::{Context, Result};
8
9/// CFG edge representation
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct CfgEdge {
12    pub src_id: u64,
13    pub dst_id: u64,
14    pub edge_type: u32,
15}
16
17/// CFG block for serialization
18#[derive(Debug, Clone)]
19pub struct SerializableCfgBlock {
20    pub id: u64,
21    pub function_id: i64,
22    pub block_kind: String,
23    pub terminator: String,
24    pub byte_start: u64,
25    pub byte_end: u64,
26    pub start_line: u64,
27    pub start_col: u64,
28    pub end_line: u64,
29    pub end_col: u64,
30    pub dominator_depth: u32,
31    pub loop_nesting: u32,
32    pub branch_count: u32,
33    pub out_edges: Vec<usize>,
34    pub cfg_hash: Option<String>,
35    pub statements: Option<Vec<String>>,
36}
37
38#[derive(Debug, Clone, Default)]
39pub struct CfgData {
40    pub blocks: Vec<SerializableCfgBlock>,
41    pub edges: Vec<CfgEdge>,
42}
43
44impl CfgData {
45    pub fn to_bytes(&self) -> Vec<u8> {
46        let mut bytes = Vec::new();
47        let block_count: u64 = self.blocks.len() as u64;
48        bytes.extend_from_slice(&block_count.to_le_bytes());
49
50        for block in &self.blocks {
51            bytes.extend_from_slice(&block.id.to_le_bytes());
52            bytes.extend_from_slice(&block.function_id.to_le_bytes());
53            bytes.extend_from_slice(&block.byte_start.to_le_bytes());
54            bytes.extend_from_slice(&block.byte_end.to_le_bytes());
55            bytes.extend_from_slice(&block.start_line.to_le_bytes());
56            bytes.extend_from_slice(&block.start_col.to_le_bytes());
57            bytes.extend_from_slice(&block.end_line.to_le_bytes());
58            bytes.extend_from_slice(&block.end_col.to_le_bytes());
59            bytes.extend_from_slice(&block.dominator_depth.to_le_bytes());
60            bytes.extend_from_slice(&block.loop_nesting.to_le_bytes());
61            bytes.extend_from_slice(&block.branch_count.to_le_bytes());
62            bytes.extend_from_slice(&(block.out_edges.len() as u32).to_le_bytes());
63
64            let write_str = |b: &mut Vec<u8>, s: &str| {
65                let sb = s.as_bytes();
66                let slen = sb.len().min(65535) as u16;
67                b.extend_from_slice(&slen.to_le_bytes());
68                b.extend_from_slice(&sb[..slen as usize]);
69            };
70
71            write_str(&mut bytes, &block.block_kind);
72            write_str(&mut bytes, &block.terminator);
73
74            for &edge_idx in &block.out_edges {
75                bytes.extend_from_slice(&(edge_idx as u32).to_le_bytes());
76            }
77
78            match &block.cfg_hash {
79                Some(h) => {
80                    bytes.push(1);
81                    write_str(&mut bytes, h);
82                }
83                None => {
84                    bytes.push(0);
85                }
86            }
87
88            match &block.statements {
89                Some(s) => {
90                    bytes.push(1);
91                    let json = serde_json::to_string(s).unwrap_or_default();
92                    let jb = json.as_bytes();
93                    bytes.extend_from_slice(&(jb.len() as u32).to_le_bytes());
94                    bytes.extend_from_slice(jb);
95                }
96                None => {
97                    bytes.push(0);
98                }
99            }
100        }
101
102        let edge_count: u64 = self.edges.len() as u64;
103        bytes.extend_from_slice(&edge_count.to_le_bytes());
104        for edge in &self.edges {
105            bytes.extend_from_slice(&edge.src_id.to_le_bytes());
106            bytes.extend_from_slice(&edge.dst_id.to_le_bytes());
107            bytes.extend_from_slice(&edge.edge_type.to_le_bytes());
108        }
109        bytes
110    }
111
112    pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
113        if bytes.len() < 8 {
114            return Err(anyhow::anyhow!("CFG data too short"));
115        }
116        let mut pos = 0;
117        let block_count = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?) as usize;
118        pos += 8;
119        let mut blocks = Vec::with_capacity(block_count);
120
121        for _ in 0..block_count {
122            let id = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
123            pos += 8;
124            let function_id = i64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
125            pos += 8;
126            let byte_start = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
127            pos += 8;
128            let byte_end = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
129            pos += 8;
130            let start_line = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
131            pos += 8;
132            let start_col = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
133            pos += 8;
134            let end_line = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
135            pos += 8;
136            let end_col = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
137            pos += 8;
138            let dominator_depth = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?);
139            pos += 4;
140            let loop_nesting = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?);
141            pos += 4;
142            let branch_count = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?);
143            pos += 4;
144            let out_edge_count = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?) as usize;
145            pos += 4;
146
147            let read_str = |p: &mut usize| -> Result<String> {
148                let slen = u16::from_le_bytes(bytes[*p..*p + 2].try_into()?) as usize;
149                *p += 2;
150                let s = String::from_utf8_lossy(&bytes[*p..*p + slen]).to_string();
151                *p += slen;
152                Ok(s)
153            };
154
155            let block_kind = read_str(&mut pos)?;
156            let terminator = read_str(&mut pos)?;
157
158            let mut out_edges = Vec::with_capacity(out_edge_count);
159            for _ in 0..out_edge_count {
160                out_edges.push(u32::from_le_bytes(bytes[pos..pos + 4].try_into()?) as usize);
161                pos += 4;
162            }
163
164            let cfg_hash = if bytes[pos] == 1 {
165                pos += 1;
166                Some(read_str(&mut pos)?)
167            } else {
168                pos += 1;
169                None
170            };
171            let statements = if bytes[pos] == 1 {
172                pos += 1;
173                let jlen = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?) as usize;
174                pos += 4;
175                let s = serde_json::from_str(&String::from_utf8_lossy(&bytes[pos..pos + jlen]))?;
176                pos += jlen;
177                Some(s)
178            } else {
179                pos += 1;
180                None
181            };
182
183            blocks.push(SerializableCfgBlock {
184                id,
185                function_id,
186                block_kind,
187                terminator,
188                byte_start,
189                byte_end,
190                start_line,
191                start_col,
192                end_line,
193                end_col,
194                dominator_depth,
195                loop_nesting,
196                branch_count,
197                out_edges,
198                cfg_hash,
199                statements,
200            });
201        }
202
203        let edge_count = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?) as usize;
204        pos += 8;
205        let mut edges = Vec::with_capacity(edge_count);
206        for _ in 0..edge_count {
207            let src_id = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
208            pos += 8;
209            let dst_id = u64::from_le_bytes(bytes[pos..pos + 8].try_into()?);
210            pos += 8;
211            let edge_type = u32::from_le_bytes(bytes[pos..pos + 4].try_into()?);
212            pos += 4;
213            edges.push(CfgEdge {
214                src_id,
215                dst_id,
216                edge_type,
217            });
218        }
219        Ok(CfgData { blocks, edges })
220    }
221
222    pub fn required_capacity(&self) -> usize {
223        let mut size = 8;
224        for block in &self.blocks {
225            size += 80
226                + 2
227                + block.block_kind.len()
228                + 2
229                + block.terminator.len()
230                + 4 * block.out_edges.len();
231            size += 1 + block.cfg_hash.as_ref().map(|h| 2 + h.len()).unwrap_or(0);
232            size += 1 + block
233                .statements
234                .as_ref()
235                .map(|s| 4 + serde_json::to_string(s).unwrap_or_default().len())
236                .unwrap_or(0);
237        }
238        size += 8 + 20 * self.edges.len();
239        size
240    }
241}
242
243pub struct CfgSectionAdapter;
244impl CfgSectionAdapter {
245    pub const SECTION_NAME: &'static str = "CFG";
246    pub fn load(storage: &mut SectionedStorage) -> Result<CfgData> {
247        let bytes = storage
248            .read_section(Self::SECTION_NAME)
249            .context("CFG section missing")?;
250        CfgData::from_bytes(&bytes)
251    }
252    pub fn save(storage: &mut SectionedStorage, data: &CfgData) -> Result<()> {
253        let bytes = data.to_bytes();
254        let required = bytes.len() as u64;
255
256        if storage.get_section(Self::SECTION_NAME).is_some() {
257            let result = storage.write_section(Self::SECTION_NAME, &bytes);
258
259            if let Err(e) = result {
260                if e.to_string().contains("overflow") || e.to_string().contains("capacity") {
261                    let current = storage.get_section(Self::SECTION_NAME).unwrap();
262                    let new_capacity = (current.capacity * 2).max(required * 2);
263                    storage
264                        .resize_section(Self::SECTION_NAME, new_capacity)
265                        .context("Failed to resize CFG section")?;
266                    storage.write_section(Self::SECTION_NAME, &bytes)?;
267                } else {
268                    return Err(e);
269                }
270            }
271        } else {
272            let section_capacity = (1024 * 1024).max(required * 2);
273            storage.create_section(Self::SECTION_NAME, section_capacity, 0)?;
274            storage.write_section(Self::SECTION_NAME, &bytes)?;
275        }
276        storage.flush()?;
277        Ok(())
278    }
279    pub fn init(storage: &mut SectionedStorage) -> Result<()> {
280        let default_capacity = 1024 * 1024; // 1MB
281        storage.create_section(Self::SECTION_NAME, default_capacity, 0)?;
282        let empty = CfgData::default();
283        let bytes = empty.to_bytes();
284        storage.write_section(Self::SECTION_NAME, &bytes)?;
285        storage.flush()?;
286        Ok(())
287    }
288    pub fn exists(storage: &SectionedStorage) -> bool {
289        storage.get_section(Self::SECTION_NAME).is_some()
290    }
291}