faf_rust_sdk/binary/
section.rs

1//! FAFB Section Entry and Section Table
2//!
3//! The section table is located at the end of the file (at section_table_offset).
4//! Each entry is 16 bytes and describes one section's location and metadata.
5//!
6//! ## Section Entry Layout (16 bytes)
7//!
8//! ```text
9//! Offset  Size  Field
10//! ------  ----  -----
11//! 0       1     section_type
12//! 1       1     priority
13//! 2       4     offset
14//! 6       4     length
15//! 10      2     token_count
16//! 12      4     flags (section-specific)
17//! ------  ----
18//! Total: 16 bytes
19//! ```
20
21use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
22use std::io::{Read, Write};
23
24use super::chunk_registry::{ChunkClassification, CLASSIFICATION_MASK};
25use super::error::{FafbError, FafbResult};
26use super::priority::Priority;
27use super::section_type::SectionType;
28
29/// Size of a single section entry in bytes
30pub const SECTION_ENTRY_SIZE: usize = 16;
31
32/// A single section entry in the section table
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct SectionEntry {
35    /// Section type identifier
36    pub section_type: SectionType,
37    /// Truncation priority (0-255, higher = more important)
38    pub priority: Priority,
39    /// Byte offset to section data (from start of file)
40    pub offset: u32,
41    /// Section data length in bytes
42    pub length: u32,
43    /// Pre-computed token count estimate
44    pub token_count: u16,
45    /// Section-specific flags (4 bytes for alignment)
46    pub flags: u32,
47}
48
49impl SectionEntry {
50    /// Create a new section entry with default priority
51    pub fn new(section_type: SectionType, offset: u32, length: u32) -> Self {
52        Self {
53            section_type,
54            priority: Priority::new(section_type.default_priority()),
55            offset,
56            length,
57            token_count: estimate_tokens(length),
58            flags: 0,
59        }
60    }
61
62    /// Create with explicit priority
63    pub fn with_priority(mut self, priority: Priority) -> Self {
64        self.priority = priority;
65        self
66    }
67
68    /// Create with explicit token count
69    pub fn with_token_count(mut self, count: u16) -> Self {
70        self.token_count = count;
71        self
72    }
73
74    /// Create with section-specific flags
75    pub fn with_flags(mut self, flags: u32) -> Self {
76        self.flags = flags;
77        self
78    }
79
80    /// Set classification in the low 2 bits of flags (v2)
81    pub fn with_classification(mut self, classification: ChunkClassification) -> Self {
82        // Clear low 2 bits, then set classification
83        self.flags = (self.flags & !CLASSIFICATION_MASK) | classification.bits();
84        self
85    }
86
87    /// Get the classification from the low 2 bits of flags (v2)
88    pub fn classification(&self) -> ChunkClassification {
89        ChunkClassification::from_bits(self.flags)
90    }
91
92    /// Get section-specific flags (bits 2+, excluding classification)
93    pub fn section_flags(&self) -> u32 {
94        self.flags & !CLASSIFICATION_MASK
95    }
96
97    /// Write entry to a byte buffer
98    pub fn write<W: Write>(&self, writer: &mut W) -> FafbResult<()> {
99        writer.write_u8(self.section_type.id())?;
100        writer.write_u8(self.priority.value())?;
101        writer.write_u32::<LittleEndian>(self.offset)?;
102        writer.write_u32::<LittleEndian>(self.length)?;
103        writer.write_u16::<LittleEndian>(self.token_count)?;
104        writer.write_u32::<LittleEndian>(self.flags)?;
105        Ok(())
106    }
107
108    /// Write entry to a new `Vec<u8>`
109    pub fn to_bytes(&self) -> FafbResult<Vec<u8>> {
110        let mut buf = Vec::with_capacity(SECTION_ENTRY_SIZE);
111        self.write(&mut buf)?;
112        Ok(buf)
113    }
114
115    /// Read entry from a byte buffer
116    pub fn read<R: Read>(reader: &mut R) -> FafbResult<Self> {
117        let section_type = SectionType::from(reader.read_u8()?);
118        let priority = Priority::from(reader.read_u8()?);
119        let offset = reader.read_u32::<LittleEndian>()?;
120        let length = reader.read_u32::<LittleEndian>()?;
121        let token_count = reader.read_u16::<LittleEndian>()?;
122        let flags = reader.read_u32::<LittleEndian>()?;
123
124        Ok(Self {
125            section_type,
126            priority,
127            offset,
128            length,
129            token_count,
130            flags,
131        })
132    }
133
134    /// Read entry from a byte slice
135    pub fn from_bytes(data: &[u8]) -> FafbResult<Self> {
136        if data.len() < SECTION_ENTRY_SIZE {
137            return Err(FafbError::FileTooSmall {
138                expected: SECTION_ENTRY_SIZE,
139                actual: data.len(),
140            });
141        }
142        let mut cursor = std::io::Cursor::new(data);
143        Self::read(&mut cursor)
144    }
145
146    /// Check if this section's data range is valid within a file
147    pub fn validate_bounds(&self, file_size: u32) -> FafbResult<()> {
148        // WHY: checked_add prevents integer overflow attacks where offset + length wraps
149        // around u32::MAX to produce a small "end" that passes the bounds check
150        // Example attack: offset=0xFFFFFF00, length=0x200 would wrap to 0x100
151        let end =
152            self.offset
153                .checked_add(self.length)
154                .ok_or(FafbError::InvalidSectionTableOffset {
155                    offset: self.offset,
156                    file_size,
157                })?;
158
159        // WHY: Bounds check prevents reading past file end - memory safety
160        if end > file_size {
161            return Err(FafbError::InvalidSectionTableOffset {
162                offset: self.offset,
163                file_size,
164            });
165        }
166
167        Ok(())
168    }
169}
170
171/// The section table containing all section entries
172#[derive(Debug, Clone, Default)]
173pub struct SectionTable {
174    entries: Vec<SectionEntry>,
175}
176
177impl SectionTable {
178    /// Create an empty section table
179    pub fn new() -> Self {
180        Self {
181            entries: Vec::new(),
182        }
183    }
184
185    /// Create with pre-allocated capacity
186    pub fn with_capacity(capacity: usize) -> Self {
187        Self {
188            entries: Vec::with_capacity(capacity),
189        }
190    }
191
192    /// Add a section entry
193    pub fn push(&mut self, entry: SectionEntry) {
194        self.entries.push(entry);
195    }
196
197    /// Get number of sections
198    pub fn len(&self) -> usize {
199        self.entries.len()
200    }
201
202    /// Check if table is empty
203    pub fn is_empty(&self) -> bool {
204        self.entries.is_empty()
205    }
206
207    /// Get entry by index
208    pub fn get(&self, index: usize) -> Option<&SectionEntry> {
209        self.entries.get(index)
210    }
211
212    /// Get entry by section type
213    pub fn get_by_type(&self, section_type: SectionType) -> Option<&SectionEntry> {
214        self.entries.iter().find(|e| e.section_type == section_type)
215    }
216
217    /// Get all entries
218    pub fn entries(&self) -> &[SectionEntry] {
219        &self.entries
220    }
221
222    /// Get entries sorted by priority (highest first)
223    pub fn entries_by_priority(&self) -> Vec<&SectionEntry> {
224        let mut sorted: Vec<_> = self.entries.iter().collect();
225        sorted.sort_by(|a, b| b.priority.cmp(&a.priority));
226        sorted
227    }
228
229    /// Get entries that fit within a token budget
230    pub fn entries_within_budget(&self, budget: u16) -> Vec<&SectionEntry> {
231        // WHY: Priority-first traversal ensures highest-value sections get budget first
232        // This is a greedy algorithm - optimal for most use cases where priorities
233        // accurately reflect importance
234        let mut result = Vec::new();
235        let mut remaining = budget;
236
237        for entry in self.entries_by_priority() {
238            if entry.token_count <= remaining {
239                result.push(entry);
240                remaining -= entry.token_count;
241            } else if entry.priority.is_critical() {
242                // WHY: Critical sections always included - they define project identity
243                // (e.g., project name, version) and are small enough to never skip
244                result.push(entry);
245            }
246            // WHY: Non-critical sections that don't fit are silently dropped
247            // This enables graceful degradation under tight token budgets
248        }
249
250        result
251    }
252
253    /// Calculate total token count
254    pub fn total_tokens(&self) -> u32 {
255        self.entries.iter().map(|e| e.token_count as u32).sum()
256    }
257
258    /// Calculate total size in bytes (for section table only)
259    pub fn table_size(&self) -> usize {
260        self.entries.len() * SECTION_ENTRY_SIZE
261    }
262
263    /// Write section table to a byte buffer
264    pub fn write<W: Write>(&self, writer: &mut W) -> FafbResult<()> {
265        for entry in &self.entries {
266            entry.write(writer)?;
267        }
268        Ok(())
269    }
270
271    /// Write section table to a new `Vec<u8>`
272    pub fn to_bytes(&self) -> FafbResult<Vec<u8>> {
273        let mut buf = Vec::with_capacity(self.table_size());
274        self.write(&mut buf)?;
275        Ok(buf)
276    }
277
278    /// Read section table from a byte buffer
279    pub fn read<R: Read>(reader: &mut R, count: usize) -> FafbResult<Self> {
280        let mut entries = Vec::with_capacity(count);
281        for _ in 0..count {
282            entries.push(SectionEntry::read(reader)?);
283        }
284        Ok(Self { entries })
285    }
286
287    /// Read section table from a byte slice
288    pub fn from_bytes(data: &[u8], count: usize) -> FafbResult<Self> {
289        let expected_size = count * SECTION_ENTRY_SIZE;
290        if data.len() < expected_size {
291            return Err(FafbError::FileTooSmall {
292                expected: expected_size,
293                actual: data.len(),
294            });
295        }
296        let mut cursor = std::io::Cursor::new(data);
297        Self::read(&mut cursor, count)
298    }
299
300    /// Validate all entries against file size
301    pub fn validate_bounds(&self, file_size: u32) -> FafbResult<()> {
302        for entry in &self.entries {
303            entry.validate_bounds(file_size)?;
304        }
305        Ok(())
306    }
307}
308
309impl IntoIterator for SectionTable {
310    type Item = SectionEntry;
311    type IntoIter = std::vec::IntoIter<SectionEntry>;
312
313    fn into_iter(self) -> Self::IntoIter {
314        self.entries.into_iter()
315    }
316}
317
318impl<'a> IntoIterator for &'a SectionTable {
319    type Item = &'a SectionEntry;
320    type IntoIter = std::slice::Iter<'a, SectionEntry>;
321
322    fn into_iter(self) -> Self::IntoIter {
323        self.entries.iter()
324    }
325}
326
327/// Estimate token count from byte length
328/// Rough estimate: ~4 bytes per token for English text
329fn estimate_tokens(byte_length: u32) -> u16 {
330    // WHY: 4 bytes/token is the empirical average for English prose in BPE tokenizers
331    // Code tends to be slightly higher (3-3.5), YAML slightly lower (4-5)
332    // WHY: u16::MAX cap prevents overflow - sections >256KB truncate to max tokens
333    // This is acceptable because such large sections will likely be truncated anyway
334    std::cmp::min(byte_length / 4, u16::MAX as u32) as u16
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn test_section_entry_size() {
343        let entry = SectionEntry::new(SectionType::Meta, 32, 100);
344        let bytes = entry.to_bytes().unwrap();
345        assert_eq!(bytes.len(), SECTION_ENTRY_SIZE);
346        assert_eq!(bytes.len(), 16);
347    }
348
349    #[test]
350    fn test_section_entry_roundtrip() {
351        let original = SectionEntry::new(SectionType::TechStack, 64, 256)
352            .with_priority(Priority::high())
353            .with_token_count(100)
354            .with_flags(0xDEADBEEF);
355
356        let bytes = original.to_bytes().unwrap();
357        let recovered = SectionEntry::from_bytes(&bytes).unwrap();
358
359        assert_eq!(original.section_type, recovered.section_type);
360        assert_eq!(original.priority, recovered.priority);
361        assert_eq!(original.offset, recovered.offset);
362        assert_eq!(original.length, recovered.length);
363        assert_eq!(original.token_count, recovered.token_count);
364        assert_eq!(original.flags, recovered.flags);
365    }
366
367    #[test]
368    fn test_section_entry_default_priority() {
369        let meta = SectionEntry::new(SectionType::Meta, 0, 100);
370        assert_eq!(meta.priority.value(), 255); // Critical
371
372        let tech = SectionEntry::new(SectionType::TechStack, 0, 100);
373        assert_eq!(tech.priority.value(), 200); // High
374
375        let context = SectionEntry::new(SectionType::Context, 0, 100);
376        assert_eq!(context.priority.value(), 64); // Low
377    }
378
379    #[test]
380    fn test_token_estimation() {
381        assert_eq!(estimate_tokens(0), 0);
382        assert_eq!(estimate_tokens(4), 1);
383        assert_eq!(estimate_tokens(100), 25);
384        assert_eq!(estimate_tokens(1000), 250);
385    }
386
387    #[test]
388    fn test_token_estimation_cap() {
389        // Should cap at u16::MAX
390        let huge = estimate_tokens(u32::MAX);
391        assert_eq!(huge, u16::MAX);
392    }
393
394    #[test]
395    fn test_section_table_empty() {
396        let table = SectionTable::new();
397        assert!(table.is_empty());
398        assert_eq!(table.len(), 0);
399        assert_eq!(table.table_size(), 0);
400    }
401
402    #[test]
403    fn test_section_table_push() {
404        let mut table = SectionTable::new();
405        table.push(SectionEntry::new(SectionType::Meta, 32, 100));
406        table.push(SectionEntry::new(SectionType::TechStack, 132, 200));
407
408        assert_eq!(table.len(), 2);
409        assert_eq!(table.table_size(), 32);
410    }
411
412    #[test]
413    fn test_section_table_roundtrip() {
414        let mut original = SectionTable::new();
415        original.push(SectionEntry::new(SectionType::Meta, 32, 100));
416        original.push(SectionEntry::new(SectionType::TechStack, 132, 200));
417        original.push(SectionEntry::new(SectionType::KeyFiles, 332, 500));
418
419        let bytes = original.to_bytes().unwrap();
420        assert_eq!(bytes.len(), 48); // 3 × 16 bytes
421
422        let recovered = SectionTable::from_bytes(&bytes, 3).unwrap();
423        assert_eq!(recovered.len(), 3);
424
425        for (orig, recv) in original.entries().iter().zip(recovered.entries().iter()) {
426            assert_eq!(orig.section_type, recv.section_type);
427            assert_eq!(orig.offset, recv.offset);
428            assert_eq!(orig.length, recv.length);
429        }
430    }
431
432    #[test]
433    fn test_section_table_get_by_type() {
434        let mut table = SectionTable::new();
435        table.push(SectionEntry::new(SectionType::Meta, 32, 100));
436        table.push(SectionEntry::new(SectionType::TechStack, 132, 200));
437
438        let meta = table.get_by_type(SectionType::Meta);
439        assert!(meta.is_some());
440        assert_eq!(meta.unwrap().offset, 32);
441
442        let missing = table.get_by_type(SectionType::KeyFiles);
443        assert!(missing.is_none());
444    }
445
446    #[test]
447    fn test_section_table_priority_sorting() {
448        let mut table = SectionTable::new();
449        table.push(SectionEntry::new(SectionType::Context, 0, 100).with_priority(Priority::low()));
450        table
451            .push(SectionEntry::new(SectionType::Meta, 0, 100).with_priority(Priority::critical()));
452        table.push(
453            SectionEntry::new(SectionType::TechStack, 0, 100).with_priority(Priority::high()),
454        );
455
456        let sorted = table.entries_by_priority();
457        assert_eq!(sorted[0].section_type, SectionType::Meta); // Critical first
458        assert_eq!(sorted[1].section_type, SectionType::TechStack); // High second
459        assert_eq!(sorted[2].section_type, SectionType::Context); // Low last
460    }
461
462    #[test]
463    fn test_section_table_budget() {
464        let mut table = SectionTable::new();
465        table.push(
466            SectionEntry::new(SectionType::Meta, 0, 100)
467                .with_priority(Priority::critical())
468                .with_token_count(50),
469        );
470        table.push(
471            SectionEntry::new(SectionType::TechStack, 0, 200)
472                .with_priority(Priority::high())
473                .with_token_count(100),
474        );
475        table.push(
476            SectionEntry::new(SectionType::Context, 0, 1000)
477                .with_priority(Priority::low())
478                .with_token_count(500),
479        );
480
481        // Budget of 200 should include Meta (50) and TechStack (100)
482        let within_budget = table.entries_within_budget(200);
483        assert_eq!(within_budget.len(), 2);
484
485        // Meta should always be included (critical)
486        assert!(within_budget
487            .iter()
488            .any(|e| e.section_type == SectionType::Meta));
489    }
490
491    #[test]
492    fn test_section_table_total_tokens() {
493        let mut table = SectionTable::new();
494        table.push(SectionEntry::new(SectionType::Meta, 0, 100).with_token_count(50));
495        table.push(SectionEntry::new(SectionType::TechStack, 0, 200).with_token_count(100));
496
497        assert_eq!(table.total_tokens(), 150);
498    }
499
500    #[test]
501    fn test_section_entry_validate_bounds() {
502        let entry = SectionEntry::new(SectionType::Meta, 100, 50);
503
504        // Valid: offset 100, length 50, file size 200
505        assert!(entry.validate_bounds(200).is_ok());
506
507        // Invalid: offset 100, length 50 = end 150, but file only 100
508        assert!(entry.validate_bounds(100).is_err());
509    }
510
511    #[test]
512    fn test_unknown_section_type_preserved() {
513        let entry = SectionEntry {
514            section_type: SectionType::Unknown(0x99),
515            priority: Priority::medium(),
516            offset: 0,
517            length: 100,
518            token_count: 25,
519            flags: 0,
520        };
521
522        let bytes = entry.to_bytes().unwrap();
523        let recovered = SectionEntry::from_bytes(&bytes).unwrap();
524
525        assert!(matches!(recovered.section_type, SectionType::Unknown(0x99)));
526    }
527}
faf_rust_sdk/binary/section.rs

faf_rust_sdk/binary/
section.rs