Skip to main content

faf_rust_sdk/binary/
compile.rs

1//! Compile/decompile API for .faf ↔ .fafb conversion
2//!
3//! Unified format: string table for section names, classification bits for
4//! DNA/Context/Pointer. Every top-level YAML key becomes a section.
5
6use std::io::Write;
7
8use super::chunk_registry::{
9    classify_key, default_priority_for_classification, ChunkClassification,
10};
11use super::error::{FafbError, FafbResult};
12use super::header::{FafbHeader, HEADER_SIZE, MAX_FILE_SIZE, MAX_SECTIONS};
13use super::priority::Priority;
14use super::section::{SectionEntry, SectionTable, SECTION_ENTRY_SIZE};
15use super::section_type::SectionType;
16use super::string_table::StringTable;
17
18/// Options for compilation
19#[derive(Debug, Clone)]
20pub struct CompileOptions {
21    /// Whether to include a timestamp (set to false for deterministic output in tests)
22    pub use_timestamp: bool,
23}
24
25impl Default for CompileOptions {
26    fn default() -> Self {
27        Self {
28            use_timestamp: true,
29        }
30    }
31}
32
33/// A decompiled .fafb file with header, section table, string table, and raw data
34#[derive(Debug, Clone)]
35pub struct DecompiledFafb {
36    /// The 32-byte header
37    pub header: FafbHeader,
38    /// Section table with all entries
39    pub section_table: SectionTable,
40    /// Raw file data (for extracting section content)
41    pub data: Vec<u8>,
42    /// String table — maps section_name_index to name strings
43    string_table: StringTable,
44}
45
46impl DecompiledFafb {
47    /// Extract the raw bytes for a section entry
48    pub fn section_data(&self, entry: &SectionEntry) -> Option<&[u8]> {
49        let start = entry.offset as usize;
50        let end = start + entry.length as usize;
51        if end <= self.data.len() {
52            Some(&self.data[start..end])
53        } else {
54            None
55        }
56    }
57
58    /// Extract section data as a UTF-8 string
59    pub fn section_string(&self, entry: &SectionEntry) -> Option<String> {
60        self.section_data(entry)
61            .and_then(|bytes| std::str::from_utf8(bytes).ok())
62            .map(|s| s.to_string())
63    }
64
65    /// Get the string table
66    pub fn string_table(&self) -> &StringTable {
67        &self.string_table
68    }
69
70    /// Get section name by entry — looks up in string table
71    pub fn section_name(&self, entry: &SectionEntry) -> String {
72        self.string_table
73            .get(entry.section_type.id())
74            .unwrap_or("UNKNOWN")
75            .to_string()
76    }
77
78    /// Get section data by name
79    pub fn get_section_by_name(&self, name: &str) -> Option<&[u8]> {
80        let idx = self.string_table.index_of(name)?;
81        self.section_table
82            .entries()
83            .iter()
84            .find(|e| e.section_type.id() == idx)
85            .and_then(|entry| self.section_data(entry))
86    }
87
88    /// Get section data by name as string
89    pub fn get_section_string_by_name(&self, name: &str) -> Option<String> {
90        self.get_section_by_name(name)
91            .and_then(|bytes| std::str::from_utf8(bytes).ok())
92            .map(|s| s.to_string())
93    }
94
95    /// Get all DNA sections
96    pub fn dna_sections(&self) -> Vec<&SectionEntry> {
97        self.section_table
98            .entries()
99            .iter()
100            .filter(|e| e.classification() == ChunkClassification::Dna)
101            .collect()
102    }
103
104    /// Get all Context sections
105    pub fn context_sections(&self) -> Vec<&SectionEntry> {
106        self.section_table
107            .entries()
108            .iter()
109            .filter(|e| e.classification() == ChunkClassification::Context)
110            .collect()
111    }
112
113    /// Get the Pointer section (typically "docs")
114    pub fn pointer_section(&self) -> Option<&SectionEntry> {
115        self.section_table
116            .entries()
117            .iter()
118            .find(|e| e.classification() == ChunkClassification::Pointer)
119    }
120}
121
122/// Compile a .faf YAML source string into .fafb binary bytes.
123///
124/// Every top-level YAML key becomes a section with a string table entry.
125/// Keys are classified as DNA/Context/Pointer automatically.
126///
127/// # Example
128///
129/// ```rust
130/// use faf_rust_sdk::binary::compile::{compile, CompileOptions};
131///
132/// let yaml = r#"
133/// faf_version: 2.5.0
134/// project:
135///   name: my-project
136///   goal: Build something great
137/// custom_data:
138///   key: value
139/// "#;
140///
141/// let opts = CompileOptions { use_timestamp: false };
142/// let fafb_bytes = compile(yaml, &opts).unwrap();
143/// assert_eq!(&fafb_bytes[0..4], b"FAFB");
144/// ```
145pub fn compile(yaml_source: &str, options: &CompileOptions) -> Result<Vec<u8>, String> {
146    let source_bytes = yaml_source.as_bytes();
147    if source_bytes.is_empty() {
148        return Err("Source content is empty".to_string());
149    }
150
151    let yaml: serde_yaml_ng::Value =
152        serde_yaml_ng::from_str(yaml_source).map_err(|e| format!("Invalid YAML: {}", e))?;
153
154    let mapping = yaml
155        .as_mapping()
156        .ok_or_else(|| "YAML root must be a mapping".to_string())?;
157
158    // Build string table and sections from all top-level keys
159    let mut string_table = StringTable::new();
160    let mut sections: Vec<(u8, ChunkClassification, Priority, Vec<u8>)> = Vec::new();
161
162    for (key, value) in mapping {
163        let key_str = key
164            .as_str()
165            .ok_or_else(|| "YAML key must be a string".to_string())?;
166
167        let name_idx = string_table
168            .add(key_str)
169            .map_err(|e| format!("String table error: {}", e))?;
170
171        let classification = classify_key(key_str);
172
173        let priority = if key_str == "faf_version" || key_str == "project" {
174            Priority::critical()
175        } else {
176            Priority::new(default_priority_for_classification(classification))
177        };
178
179        let content = serde_yaml_ng::to_string(value)
180            .map_err(|e| format!("Failed to serialize '{}': {}", key_str, e))?;
181        let data = format!("{}:\n{}", key_str, content).into_bytes();
182
183        sections.push((name_idx, classification, priority, data));
184    }
185
186    if sections.is_empty() {
187        return Err("No sections found in YAML".to_string());
188    }
189
190    if sections.len() > MAX_SECTIONS as usize {
191        return Err(format!(
192            "Too many sections: {} exceeds maximum {}",
193            sections.len(),
194            MAX_SECTIONS
195        ));
196    }
197
198    // Add __string_table__ name to string table before serializing
199    let st_name_idx = string_table
200        .add("__string_table__")
201        .map_err(|e| format!("String table error: {}", e))?;
202
203    let string_table_bytes = string_table
204        .to_bytes()
205        .map_err(|e| format!("String table serialization error: {}", e))?;
206
207    // Layout: [HEADER 32B] [section data...] [string table data] [section table entries...]
208    let mut data_offset: u32 = HEADER_SIZE as u32;
209    let mut section_data: Vec<u8> = Vec::new();
210    let mut section_table = SectionTable::new();
211
212    for (name_idx, classification, priority, data) in &sections {
213        let entry = SectionEntry::new(SectionType::from(*name_idx), data_offset, data.len() as u32)
214            .with_priority(*priority)
215            .with_classification(*classification);
216
217        section_table.push(entry);
218        section_data.extend_from_slice(data);
219        data_offset = data_offset
220            .checked_add(data.len() as u32)
221            .ok_or_else(|| "Section data exceeds u32::MAX bytes".to_string())?;
222    }
223
224    // String table section (last content section)
225    let st_section_index = section_table.len() as u16;
226    let st_entry = SectionEntry::new(
227        SectionType::from(st_name_idx),
228        data_offset,
229        string_table_bytes.len() as u32,
230    )
231    .with_priority(Priority::critical());
232
233    section_table.push(st_entry);
234    section_data.extend_from_slice(&string_table_bytes);
235    data_offset = data_offset
236        .checked_add(string_table_bytes.len() as u32)
237        .ok_or_else(|| "Section data exceeds u32::MAX bytes".to_string())?;
238
239    let section_count = section_table.len();
240    let section_table_size = section_count * SECTION_ENTRY_SIZE;
241    let section_table_offset = data_offset;
242    let total_size = section_table_offset
243        .checked_add(section_table_size as u32)
244        .ok_or_else(|| "Total file size exceeds u32::MAX bytes".to_string())?;
245
246    if total_size > MAX_FILE_SIZE {
247        return Err(format!(
248            "Output size {} bytes exceeds maximum {} bytes (10MB)",
249            total_size, MAX_FILE_SIZE
250        ));
251    }
252
253    // Build header
254    let mut header = if options.use_timestamp {
255        FafbHeader::with_timestamp()
256    } else {
257        FafbHeader::new()
258    };
259    header.set_source_checksum(source_bytes);
260    header.section_count = section_count as u16;
261    header.section_table_offset = section_table_offset;
262    header.total_size = total_size;
263    header.string_table_index = st_section_index;
264
265    // Assemble binary
266    let mut output: Vec<u8> = Vec::with_capacity(total_size as usize);
267    header.write(&mut output).map_err(|e| e.to_string())?;
268    output.write_all(&section_data).map_err(|e| e.to_string())?;
269    section_table
270        .write(&mut output)
271        .map_err(|e| e.to_string())?;
272
273    if output.len() != total_size as usize {
274        return Err(format!(
275            "Internal error: size mismatch (expected {} bytes, got {} bytes)",
276            total_size,
277            output.len()
278        ));
279    }
280
281    Ok(output)
282}
283
284/// Decompile .fafb binary bytes into a structured representation.
285///
286/// Parses header, section table, and string table.
287///
288/// # Example
289///
290/// ```rust
291/// use faf_rust_sdk::binary::compile::{compile, decompile, CompileOptions};
292///
293/// let yaml = "faf_version: 2.5.0\nproject:\n  name: test\n";
294/// let opts = CompileOptions { use_timestamp: false };
295/// let fafb_bytes = compile(yaml, &opts).unwrap();
296///
297/// let result = decompile(&fafb_bytes).unwrap();
298/// assert_eq!(result.header.version_major, 1);
299///
300/// let project = result.get_section_string_by_name("project").unwrap();
301/// assert!(project.contains("test"));
302/// ```
303pub fn decompile(fafb_bytes: &[u8]) -> FafbResult<DecompiledFafb> {
304    let header = FafbHeader::from_bytes(fafb_bytes)?;
305    header.validate(fafb_bytes)?;
306
307    // Read section table
308    let table_start = header.section_table_offset as usize;
309    let table_data = &fafb_bytes[table_start..];
310    let section_table = SectionTable::from_bytes(table_data, header.section_count as usize)?;
311    section_table.validate_bounds(header.total_size)?;
312
313    // Extract string table (required)
314    let st_index = header.string_table_index as usize;
315    if st_index >= section_table.len() {
316        return Err(FafbError::MissingStringTable);
317    }
318    let st_entry = section_table.get(st_index).unwrap();
319    let st_start = st_entry.offset as usize;
320    let st_end = st_start + st_entry.length as usize;
321    if st_end > fafb_bytes.len() {
322        return Err(FafbError::MissingStringTable);
323    }
324    let string_table = StringTable::from_bytes(&fafb_bytes[st_start..st_end])?;
325
326    Ok(DecompiledFafb {
327        header,
328        section_table,
329        data: fafb_bytes.to_vec(),
330        string_table,
331    })
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337
338    fn opts() -> CompileOptions {
339        CompileOptions {
340            use_timestamp: false,
341        }
342    }
343
344    fn minimal_yaml() -> &'static str {
345        "faf_version: 2.5.0\nproject:\n  name: test-project\n"
346    }
347
348    fn full_yaml() -> &'static str {
349        r#"faf_version: 2.5.0
350project:
351  name: full-project
352  goal: Test the compiler
353tech_stack:
354  languages:
355    - Rust
356    - TypeScript
357commands:
358  build: cargo build
359  test: cargo test
360architecture:
361  style: microservices
362context:
363  notes: some context
364docs:
365  readme: README.md
366custom_field:
367  key: value
368another_custom:
369  deep:
370    nested: data
371"#
372    }
373
374    // ─── Core compile/decompile ───
375
376    #[test]
377    fn test_compile_produces_valid_header() {
378        let bytes = compile(minimal_yaml(), &opts()).unwrap();
379        assert_eq!(&bytes[0..4], b"FAFB");
380        assert_eq!(bytes[4], 1); // version_major
381        assert!(bytes.len() >= HEADER_SIZE);
382    }
383
384    #[test]
385    fn test_compile_empty_fails() {
386        assert!(compile("", &opts()).is_err());
387    }
388
389    #[test]
390    fn test_compile_options_default() {
391        let o = CompileOptions::default();
392        assert!(o.use_timestamp);
393    }
394
395    #[test]
396    fn test_roundtrip_minimal() {
397        let bytes = compile(minimal_yaml(), &opts()).unwrap();
398        let result = decompile(&bytes).unwrap();
399
400        assert_eq!(result.header.version_major, 1);
401        assert!(result.header.flags.has_string_table());
402
403        // faf_version + project + __string_table__
404        assert!(result.section_table.len() >= 3);
405
406        let project = result.get_section_string_by_name("project").unwrap();
407        assert!(project.contains("test-project"));
408    }
409
410    #[test]
411    fn test_roundtrip_full() {
412        let bytes = compile(full_yaml(), &opts()).unwrap();
413        let result = decompile(&bytes).unwrap();
414
415        let st = result.string_table();
416        assert!(st.index_of("faf_version").is_some());
417        assert!(st.index_of("project").is_some());
418        assert!(st.index_of("tech_stack").is_some());
419        assert!(st.index_of("commands").is_some());
420        assert!(st.index_of("docs").is_some());
421        assert!(st.index_of("custom_field").is_some());
422        assert!(st.index_of("another_custom").is_some());
423    }
424
425    #[test]
426    fn test_decompile_invalid_magic() {
427        let bytes = vec![0u8; 32];
428        assert!(decompile(&bytes).is_err());
429    }
430
431    #[test]
432    fn test_decompile_too_small() {
433        let bytes = vec![0u8; 16];
434        assert!(decompile(&bytes).is_err());
435    }
436
437    #[test]
438    fn test_source_checksum() {
439        let yaml = full_yaml();
440        let bytes = compile(yaml, &opts()).unwrap();
441        let result = decompile(&bytes).unwrap();
442
443        let expected = FafbHeader::compute_checksum(yaml.as_bytes());
444        assert_eq!(result.header.source_checksum, expected);
445    }
446
447    #[test]
448    fn test_deterministic_without_timestamp() {
449        let yaml = minimal_yaml();
450        let bytes1 = compile(yaml, &opts()).unwrap();
451        let bytes2 = compile(yaml, &opts()).unwrap();
452        assert_eq!(bytes1, bytes2);
453    }
454
455    // ─── Section names ───
456
457    #[test]
458    fn test_section_names() {
459        let bytes = compile(full_yaml(), &opts()).unwrap();
460        let result = decompile(&bytes).unwrap();
461
462        for entry in result.section_table.entries() {
463            let name = result.section_name(entry);
464            assert!(!name.is_empty());
465        }
466    }
467
468    #[test]
469    fn test_get_section_by_name() {
470        let bytes = compile(full_yaml(), &opts()).unwrap();
471        let result = decompile(&bytes).unwrap();
472
473        let project = result.get_section_string_by_name("project");
474        assert!(project.is_some());
475        assert!(project.unwrap().contains("full-project"));
476
477        let docs = result.get_section_string_by_name("docs");
478        assert!(docs.is_some());
479        assert!(docs.unwrap().contains("README.md"));
480    }
481
482    // ─── Classification ───
483
484    #[test]
485    fn test_classification_dna() {
486        let bytes = compile(full_yaml(), &opts()).unwrap();
487        let result = decompile(&bytes).unwrap();
488
489        let dna = result.dna_sections();
490        let dna_names: Vec<String> = dna.iter().map(|e| result.section_name(e)).collect();
491
492        assert!(dna_names.contains(&"faf_version".to_string()));
493        assert!(dna_names.contains(&"project".to_string()));
494        assert!(dna_names.contains(&"tech_stack".to_string()));
495        assert!(dna_names.contains(&"commands".to_string()));
496        assert!(dna_names.contains(&"architecture".to_string()));
497        assert!(dna_names.contains(&"context".to_string()));
498    }
499
500    #[test]
501    fn test_classification_context() {
502        let bytes = compile(full_yaml(), &opts()).unwrap();
503        let result = decompile(&bytes).unwrap();
504
505        let ctx = result.context_sections();
506        let ctx_names: Vec<String> = ctx.iter().map(|e| result.section_name(e)).collect();
507
508        assert!(ctx_names.contains(&"custom_field".to_string()));
509        assert!(ctx_names.contains(&"another_custom".to_string()));
510    }
511
512    #[test]
513    fn test_classification_pointer() {
514        let bytes = compile(full_yaml(), &opts()).unwrap();
515        let result = decompile(&bytes).unwrap();
516
517        let ptr = result.pointer_section();
518        assert!(ptr.is_some());
519        let ptr_name = result.section_name(ptr.unwrap());
520        assert_eq!(ptr_name, "docs");
521    }
522
523    #[test]
524    fn test_unknown_chunk_preserved() {
525        let yaml =
526            "faf_version: 2.5.0\nproject:\n  name: test\nmy_exotic_field:\n  data: preserved\n";
527        let bytes = compile(yaml, &opts()).unwrap();
528        let result = decompile(&bytes).unwrap();
529
530        let exotic = result.get_section_string_by_name("my_exotic_field");
531        assert!(exotic.is_some());
532        assert!(exotic.unwrap().contains("preserved"));
533    }
534
535    // ─── String table ───
536
537    #[test]
538    fn test_string_table_flag_set() {
539        let bytes = compile(minimal_yaml(), &opts()).unwrap();
540        let result = decompile(&bytes).unwrap();
541        assert!(result.header.flags.has_string_table());
542    }
543
544    #[test]
545    fn test_string_table_index_valid() {
546        let bytes = compile(minimal_yaml(), &opts()).unwrap();
547        let result = decompile(&bytes).unwrap();
548
549        let st_idx = result.header.string_table_index as usize;
550        assert!(st_idx < result.section_table.len());
551    }
552
553    // ─── Priority ───
554
555    #[test]
556    fn test_priority_ordering() {
557        let bytes = compile(full_yaml(), &opts()).unwrap();
558        let result = decompile(&bytes).unwrap();
559
560        let st = result.string_table();
561        for entry in result.section_table.entries() {
562            let name = st
563                .get(entry.section_type.id())
564                .unwrap_or("__string_table__");
565            if name == "faf_version" || name == "project" {
566                assert!(
567                    entry.priority.is_critical(),
568                    "Expected '{}' to have critical priority",
569                    name
570                );
571            }
572        }
573    }
574
575    // ─── Known chunk types ───
576
577    #[test]
578    fn test_all_known_chunk_types() {
579        let yaml = r#"faf_version: 2.5.0
580project:
581  name: all-types
582instant_context:
583  summary: test
584tech_stack:
585  - Rust
586key_files:
587  - main.rs
588commands:
589  build: make
590architecture:
591  style: monolith
592context:
593  note: x
594bi_sync:
595  enabled: true
596meta:
597  extra: data
598docs:
599  readme: README.md
600"#;
601        let bytes = compile(yaml, &opts()).unwrap();
602        let result = decompile(&bytes).unwrap();
603
604        let st = result.string_table();
605        for key in &[
606            "faf_version",
607            "project",
608            "instant_context",
609            "tech_stack",
610            "key_files",
611            "commands",
612            "architecture",
613            "context",
614            "bi_sync",
615            "meta",
616            "docs",
617        ] {
618            assert!(
619                st.index_of(key).is_some(),
620                "Expected '{}' in string table",
621                key
622            );
623        }
624    }
625}