Skip to main content

faf_rust_sdk/binary/
section.rs

1//! FAFB Section Entry and Section Table
2//!
3//! The section table is located at the end of the file (at section_table_offset).
4//! Each entry is 16 bytes and describes one section's location and metadata.
5//!
6//! ## Section Entry Layout (16 bytes)
7//!
8//! ```text
9//! Offset  Size  Field
10//! ------  ----  -----
11//! 0       1     section_type
12//! 1       1     priority
13//! 2       4     offset
14//! 6       4     length
15//! 10      2     token_count
16//! 12      4     flags (section-specific)
17//! ------  ----
18//! Total: 16 bytes
19//! ```
20
21use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
22use std::io::{Read, Write};
23
24use super::error::{FafbError, FafbResult};
25use super::priority::Priority;
26use super::section_type::SectionType;
27
28/// Size of a single section entry in bytes
29pub const SECTION_ENTRY_SIZE: usize = 16;
30
31/// A single section entry in the section table
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct SectionEntry {
34    /// Section type identifier
35    pub section_type: SectionType,
36    /// Truncation priority (0-255, higher = more important)
37    pub priority: Priority,
38    /// Byte offset to section data (from start of file)
39    pub offset: u32,
40    /// Section data length in bytes
41    pub length: u32,
42    /// Pre-computed token count estimate
43    pub token_count: u16,
44    /// Section-specific flags (4 bytes for alignment)
45    pub flags: u32,
46}
47
48impl SectionEntry {
49    /// Create a new section entry with default priority
50    pub fn new(section_type: SectionType, offset: u32, length: u32) -> Self {
51        Self {
52            section_type,
53            priority: Priority::new(section_type.default_priority()),
54            offset,
55            length,
56            token_count: estimate_tokens(length),
57            flags: 0,
58        }
59    }
60
61    /// Create with explicit priority
62    pub fn with_priority(mut self, priority: Priority) -> Self {
63        self.priority = priority;
64        self
65    }
66
67    /// Create with explicit token count
68    pub fn with_token_count(mut self, count: u16) -> Self {
69        self.token_count = count;
70        self
71    }
72
73    /// Create with section-specific flags
74    pub fn with_flags(mut self, flags: u32) -> Self {
75        self.flags = flags;
76        self
77    }
78
79    /// Write entry to a byte buffer
80    pub fn write<W: Write>(&self, writer: &mut W) -> FafbResult<()> {
81        writer.write_u8(self.section_type.id())?;
82        writer.write_u8(self.priority.value())?;
83        writer.write_u32::<LittleEndian>(self.offset)?;
84        writer.write_u32::<LittleEndian>(self.length)?;
85        writer.write_u16::<LittleEndian>(self.token_count)?;
86        writer.write_u32::<LittleEndian>(self.flags)?;
87        Ok(())
88    }
89
90    /// Write entry to a new `Vec<u8>`
91    pub fn to_bytes(&self) -> FafbResult<Vec<u8>> {
92        let mut buf = Vec::with_capacity(SECTION_ENTRY_SIZE);
93        self.write(&mut buf)?;
94        Ok(buf)
95    }
96
97    /// Read entry from a byte buffer
98    pub fn read<R: Read>(reader: &mut R) -> FafbResult<Self> {
99        let section_type = SectionType::from(reader.read_u8()?);
100        let priority = Priority::from(reader.read_u8()?);
101        let offset = reader.read_u32::<LittleEndian>()?;
102        let length = reader.read_u32::<LittleEndian>()?;
103        let token_count = reader.read_u16::<LittleEndian>()?;
104        let flags = reader.read_u32::<LittleEndian>()?;
105
106        Ok(Self {
107            section_type,
108            priority,
109            offset,
110            length,
111            token_count,
112            flags,
113        })
114    }
115
116    /// Read entry from a byte slice
117    pub fn from_bytes(data: &[u8]) -> FafbResult<Self> {
118        if data.len() < SECTION_ENTRY_SIZE {
119            return Err(FafbError::FileTooSmall {
120                expected: SECTION_ENTRY_SIZE,
121                actual: data.len(),
122            });
123        }
124        let mut cursor = std::io::Cursor::new(data);
125        Self::read(&mut cursor)
126    }
127
128    /// Check if this section's data range is valid within a file
129    pub fn validate_bounds(&self, file_size: u32) -> FafbResult<()> {
130        // WHY: checked_add prevents integer overflow attacks where offset + length wraps
131        // around u32::MAX to produce a small "end" that passes the bounds check
132        // Example attack: offset=0xFFFFFF00, length=0x200 would wrap to 0x100
133        let end =
134            self.offset
135                .checked_add(self.length)
136                .ok_or(FafbError::InvalidSectionTableOffset {
137                    offset: self.offset,
138                    file_size,
139                })?;
140
141        // WHY: Bounds check prevents reading past file end - memory safety
142        if end > file_size {
143            return Err(FafbError::InvalidSectionTableOffset {
144                offset: self.offset,
145                file_size,
146            });
147        }
148
149        Ok(())
150    }
151}
152
153/// The section table containing all section entries
154#[derive(Debug, Clone, Default)]
155pub struct SectionTable {
156    entries: Vec<SectionEntry>,
157}
158
159impl SectionTable {
160    /// Create an empty section table
161    pub fn new() -> Self {
162        Self {
163            entries: Vec::new(),
164        }
165    }
166
167    /// Create with pre-allocated capacity
168    pub fn with_capacity(capacity: usize) -> Self {
169        Self {
170            entries: Vec::with_capacity(capacity),
171        }
172    }
173
174    /// Add a section entry
175    pub fn push(&mut self, entry: SectionEntry) {
176        self.entries.push(entry);
177    }
178
179    /// Get number of sections
180    pub fn len(&self) -> usize {
181        self.entries.len()
182    }
183
184    /// Check if table is empty
185    pub fn is_empty(&self) -> bool {
186        self.entries.is_empty()
187    }
188
189    /// Get entry by index
190    pub fn get(&self, index: usize) -> Option<&SectionEntry> {
191        self.entries.get(index)
192    }
193
194    /// Get entry by section type
195    pub fn get_by_type(&self, section_type: SectionType) -> Option<&SectionEntry> {
196        self.entries.iter().find(|e| e.section_type == section_type)
197    }
198
199    /// Get all entries
200    pub fn entries(&self) -> &[SectionEntry] {
201        &self.entries
202    }
203
204    /// Get entries sorted by priority (highest first)
205    pub fn entries_by_priority(&self) -> Vec<&SectionEntry> {
206        let mut sorted: Vec<_> = self.entries.iter().collect();
207        sorted.sort_by(|a, b| b.priority.cmp(&a.priority));
208        sorted
209    }
210
211    /// Get entries that fit within a token budget
212    pub fn entries_within_budget(&self, budget: u16) -> Vec<&SectionEntry> {
213        // WHY: Priority-first traversal ensures highest-value sections get budget first
214        // This is a greedy algorithm - optimal for most use cases where priorities
215        // accurately reflect importance
216        let mut result = Vec::new();
217        let mut remaining = budget;
218
219        for entry in self.entries_by_priority() {
220            if entry.token_count <= remaining {
221                result.push(entry);
222                remaining -= entry.token_count;
223            } else if entry.priority.is_critical() {
224                // WHY: Critical sections always included - they define project identity
225                // (e.g., project name, version) and are small enough to never skip
226                result.push(entry);
227            }
228            // WHY: Non-critical sections that don't fit are silently dropped
229            // This enables graceful degradation under tight token budgets
230        }
231
232        result
233    }
234
235    /// Calculate total token count
236    pub fn total_tokens(&self) -> u32 {
237        self.entries.iter().map(|e| e.token_count as u32).sum()
238    }
239
240    /// Calculate total size in bytes (for section table only)
241    pub fn table_size(&self) -> usize {
242        self.entries.len() * SECTION_ENTRY_SIZE
243    }
244
245    /// Write section table to a byte buffer
246    pub fn write<W: Write>(&self, writer: &mut W) -> FafbResult<()> {
247        for entry in &self.entries {
248            entry.write(writer)?;
249        }
250        Ok(())
251    }
252
253    /// Write section table to a new `Vec<u8>`
254    pub fn to_bytes(&self) -> FafbResult<Vec<u8>> {
255        let mut buf = Vec::with_capacity(self.table_size());
256        self.write(&mut buf)?;
257        Ok(buf)
258    }
259
260    /// Read section table from a byte buffer
261    pub fn read<R: Read>(reader: &mut R, count: usize) -> FafbResult<Self> {
262        let mut entries = Vec::with_capacity(count);
263        for _ in 0..count {
264            entries.push(SectionEntry::read(reader)?);
265        }
266        Ok(Self { entries })
267    }
268
269    /// Read section table from a byte slice
270    pub fn from_bytes(data: &[u8], count: usize) -> FafbResult<Self> {
271        let expected_size = count * SECTION_ENTRY_SIZE;
272        if data.len() < expected_size {
273            return Err(FafbError::FileTooSmall {
274                expected: expected_size,
275                actual: data.len(),
276            });
277        }
278        let mut cursor = std::io::Cursor::new(data);
279        Self::read(&mut cursor, count)
280    }
281
282    /// Validate all entries against file size
283    pub fn validate_bounds(&self, file_size: u32) -> FafbResult<()> {
284        for entry in &self.entries {
285            entry.validate_bounds(file_size)?;
286        }
287        Ok(())
288    }
289}
290
291impl IntoIterator for SectionTable {
292    type Item = SectionEntry;
293    type IntoIter = std::vec::IntoIter<SectionEntry>;
294
295    fn into_iter(self) -> Self::IntoIter {
296        self.entries.into_iter()
297    }
298}
299
300impl<'a> IntoIterator for &'a SectionTable {
301    type Item = &'a SectionEntry;
302    type IntoIter = std::slice::Iter<'a, SectionEntry>;
303
304    fn into_iter(self) -> Self::IntoIter {
305        self.entries.iter()
306    }
307}
308
309/// Estimate token count from byte length
310/// Rough estimate: ~4 bytes per token for English text
311fn estimate_tokens(byte_length: u32) -> u16 {
312    // WHY: 4 bytes/token is the empirical average for English prose in BPE tokenizers
313    // Code tends to be slightly higher (3-3.5), YAML slightly lower (4-5)
314    // WHY: u16::MAX cap prevents overflow - sections >256KB truncate to max tokens
315    // This is acceptable because such large sections will likely be truncated anyway
316    std::cmp::min(byte_length / 4, u16::MAX as u32) as u16
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    #[test]
324    fn test_section_entry_size() {
325        let entry = SectionEntry::new(SectionType::Meta, 32, 100);
326        let bytes = entry.to_bytes().unwrap();
327        assert_eq!(bytes.len(), SECTION_ENTRY_SIZE);
328        assert_eq!(bytes.len(), 16);
329    }
330
331    #[test]
332    fn test_section_entry_roundtrip() {
333        let original = SectionEntry::new(SectionType::TechStack, 64, 256)
334            .with_priority(Priority::high())
335            .with_token_count(100)
336            .with_flags(0xDEADBEEF);
337
338        let bytes = original.to_bytes().unwrap();
339        let recovered = SectionEntry::from_bytes(&bytes).unwrap();
340
341        assert_eq!(original.section_type, recovered.section_type);
342        assert_eq!(original.priority, recovered.priority);
343        assert_eq!(original.offset, recovered.offset);
344        assert_eq!(original.length, recovered.length);
345        assert_eq!(original.token_count, recovered.token_count);
346        assert_eq!(original.flags, recovered.flags);
347    }
348
349    #[test]
350    fn test_section_entry_default_priority() {
351        let meta = SectionEntry::new(SectionType::Meta, 0, 100);
352        assert_eq!(meta.priority.value(), 255); // Critical
353
354        let tech = SectionEntry::new(SectionType::TechStack, 0, 100);
355        assert_eq!(tech.priority.value(), 200); // High
356
357        let context = SectionEntry::new(SectionType::Context, 0, 100);
358        assert_eq!(context.priority.value(), 64); // Low
359    }
360
361    #[test]
362    fn test_token_estimation() {
363        assert_eq!(estimate_tokens(0), 0);
364        assert_eq!(estimate_tokens(4), 1);
365        assert_eq!(estimate_tokens(100), 25);
366        assert_eq!(estimate_tokens(1000), 250);
367    }
368
369    #[test]
370    fn test_token_estimation_cap() {
371        // Should cap at u16::MAX
372        let huge = estimate_tokens(u32::MAX);
373        assert_eq!(huge, u16::MAX);
374    }
375
376    #[test]
377    fn test_section_table_empty() {
378        let table = SectionTable::new();
379        assert!(table.is_empty());
380        assert_eq!(table.len(), 0);
381        assert_eq!(table.table_size(), 0);
382    }
383
384    #[test]
385    fn test_section_table_push() {
386        let mut table = SectionTable::new();
387        table.push(SectionEntry::new(SectionType::Meta, 32, 100));
388        table.push(SectionEntry::new(SectionType::TechStack, 132, 200));
389
390        assert_eq!(table.len(), 2);
391        assert_eq!(table.table_size(), 32);
392    }
393
394    #[test]
395    fn test_section_table_roundtrip() {
396        let mut original = SectionTable::new();
397        original.push(SectionEntry::new(SectionType::Meta, 32, 100));
398        original.push(SectionEntry::new(SectionType::TechStack, 132, 200));
399        original.push(SectionEntry::new(SectionType::KeyFiles, 332, 500));
400
401        let bytes = original.to_bytes().unwrap();
402        assert_eq!(bytes.len(), 48); // 3 × 16 bytes
403
404        let recovered = SectionTable::from_bytes(&bytes, 3).unwrap();
405        assert_eq!(recovered.len(), 3);
406
407        for (orig, recv) in original.entries().iter().zip(recovered.entries().iter()) {
408            assert_eq!(orig.section_type, recv.section_type);
409            assert_eq!(orig.offset, recv.offset);
410            assert_eq!(orig.length, recv.length);
411        }
412    }
413
414    #[test]
415    fn test_section_table_get_by_type() {
416        let mut table = SectionTable::new();
417        table.push(SectionEntry::new(SectionType::Meta, 32, 100));
418        table.push(SectionEntry::new(SectionType::TechStack, 132, 200));
419
420        let meta = table.get_by_type(SectionType::Meta);
421        assert!(meta.is_some());
422        assert_eq!(meta.unwrap().offset, 32);
423
424        let missing = table.get_by_type(SectionType::KeyFiles);
425        assert!(missing.is_none());
426    }
427
428    #[test]
429    fn test_section_table_priority_sorting() {
430        let mut table = SectionTable::new();
431        table.push(SectionEntry::new(SectionType::Context, 0, 100).with_priority(Priority::low()));
432        table
433            .push(SectionEntry::new(SectionType::Meta, 0, 100).with_priority(Priority::critical()));
434        table.push(
435            SectionEntry::new(SectionType::TechStack, 0, 100).with_priority(Priority::high()),
436        );
437
438        let sorted = table.entries_by_priority();
439        assert_eq!(sorted[0].section_type, SectionType::Meta); // Critical first
440        assert_eq!(sorted[1].section_type, SectionType::TechStack); // High second
441        assert_eq!(sorted[2].section_type, SectionType::Context); // Low last
442    }
443
444    #[test]
445    fn test_section_table_budget() {
446        let mut table = SectionTable::new();
447        table.push(
448            SectionEntry::new(SectionType::Meta, 0, 100)
449                .with_priority(Priority::critical())
450                .with_token_count(50),
451        );
452        table.push(
453            SectionEntry::new(SectionType::TechStack, 0, 200)
454                .with_priority(Priority::high())
455                .with_token_count(100),
456        );
457        table.push(
458            SectionEntry::new(SectionType::Context, 0, 1000)
459                .with_priority(Priority::low())
460                .with_token_count(500),
461        );
462
463        // Budget of 200 should include Meta (50) and TechStack (100)
464        let within_budget = table.entries_within_budget(200);
465        assert_eq!(within_budget.len(), 2);
466
467        // Meta should always be included (critical)
468        assert!(within_budget
469            .iter()
470            .any(|e| e.section_type == SectionType::Meta));
471    }
472
473    #[test]
474    fn test_section_table_total_tokens() {
475        let mut table = SectionTable::new();
476        table.push(SectionEntry::new(SectionType::Meta, 0, 100).with_token_count(50));
477        table.push(SectionEntry::new(SectionType::TechStack, 0, 200).with_token_count(100));
478
479        assert_eq!(table.total_tokens(), 150);
480    }
481
482    #[test]
483    fn test_section_entry_validate_bounds() {
484        let entry = SectionEntry::new(SectionType::Meta, 100, 50);
485
486        // Valid: offset 100, length 50, file size 200
487        assert!(entry.validate_bounds(200).is_ok());
488
489        // Invalid: offset 100, length 50 = end 150, but file only 100
490        assert!(entry.validate_bounds(100).is_err());
491    }
492
493    #[test]
494    fn test_unknown_section_type_preserved() {
495        let entry = SectionEntry {
496            section_type: SectionType::Unknown(0x99),
497            priority: Priority::medium(),
498            offset: 0,
499            length: 100,
500            token_count: 25,
501            flags: 0,
502        };
503
504        let bytes = entry.to_bytes().unwrap();
505        let recovered = SectionEntry::from_bytes(&bytes).unwrap();
506
507        assert!(matches!(recovered.section_type, SectionType::Unknown(0x99)));
508    }
509}