Skip to main content

altium_format/api/
cfb.rs

1//! Layer 1: CFB wrapper with Altium-specific convenience methods.
2//!
3//! Provides low-level access to OLE Compound Document files used by Altium,
4//! with helpers for stream enumeration, block parsing, and reverse engineering.
5
6use std::collections::HashMap;
7use std::fs::File;
8use std::io::{self, Cursor, Read, Seek};
9use std::path::Path;
10
11use byteorder::{LittleEndian, ReadBytesExt};
12use cfb::CompoundFile;
13
14use crate::error::{AltiumError, Result};
15use crate::io::reader::{decompress_zlib, read_string_block};
16use crate::types::ParameterCollection;
17
18/// Detected Altium file type.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum AltiumFileType {
21    /// Schematic symbol library (.SchLib)
22    SchLib,
23    /// Schematic document (.SchDoc)
24    SchDoc,
25    /// PCB footprint library (.PcbLib)
26    PcbLib,
27    /// PCB document (.PcbDoc)
28    PcbDoc,
29    /// Unknown file type
30    Unknown,
31}
32
33impl AltiumFileType {
34    /// Returns human-readable name for this file type.
35    pub fn name(&self) -> &'static str {
36        match self {
37            AltiumFileType::SchLib => "SchLib",
38            AltiumFileType::SchDoc => "SchDoc",
39            AltiumFileType::PcbLib => "PcbLib",
40            AltiumFileType::PcbDoc => "PcbDoc",
41            AltiumFileType::Unknown => "Unknown",
42        }
43    }
44
45    /// Returns true if this is a library file (SchLib or PcbLib).
46    pub fn is_library(&self) -> bool {
47        matches!(self, AltiumFileType::SchLib | AltiumFileType::PcbLib)
48    }
49
50    /// Returns true if this is a schematic file (SchLib or SchDoc).
51    pub fn is_schematic(&self) -> bool {
52        matches!(self, AltiumFileType::SchLib | AltiumFileType::SchDoc)
53    }
54
55    /// Returns true if this is a PCB file (PcbLib or PcbDoc).
56    pub fn is_pcb(&self) -> bool {
57        matches!(self, AltiumFileType::PcbLib | AltiumFileType::PcbDoc)
58    }
59}
60
61/// Information about a stream in the CFB container.
62#[derive(Debug, Clone)]
63pub struct StreamInfo {
64    /// Full path to the stream (e.g., "/FileHeader")
65    pub path: String,
66    /// Size in bytes
67    pub size: u64,
68}
69
70/// Information about a storage (directory) in the CFB container.
71#[derive(Debug, Clone)]
72pub struct StorageInfo {
73    /// Full path to the storage (e.g., "/Resistor")
74    pub path: String,
75    /// Number of child entries (streams + storages)
76    pub child_count: usize,
77}
78
79/// A raw block from a size-prefixed stream.
80#[derive(Debug, Clone)]
81pub struct Block {
82    /// Offset in the stream where this block starts
83    pub offset: usize,
84    /// Size of the data (excluding header)
85    pub size: usize,
86    /// Flags from the high byte of the size field
87    pub flags: u8,
88    /// Raw block data
89    pub data: Vec<u8>,
90}
91
92impl Block {
93    /// Returns true if the binary flag (0x01) is set.
94    ///
95    /// In schematic files, this indicates a binary record rather than
96    /// a pipe-delimited parameter string.
97    pub fn is_binary(&self) -> bool {
98        (self.flags & 0x01) != 0
99    }
100
101    /// Try to parse the block as a ParameterCollection.
102    ///
103    /// Returns None if the block appears to be binary or parsing fails.
104    pub fn as_params(&self) -> Option<ParameterCollection> {
105        if self.is_binary() || self.data.is_empty() {
106            return None;
107        }
108
109        // Try to interpret as null-terminated string
110        let end = self
111            .data
112            .iter()
113            .position(|&b| b == 0)
114            .unwrap_or(self.data.len());
115        let text = String::from_utf8_lossy(&self.data[..end]);
116
117        // Quick check: parameters should start with separator
118        if !text.starts_with('|') && !text.starts_with('`') {
119            return None;
120        }
121
122        Some(ParameterCollection::from_string(&text))
123    }
124
125    /// Returns the raw data bytes.
126    pub fn data(&self) -> &[u8] {
127        &self.data
128    }
129}
130
131/// Wrapper around CFB providing Altium-specific convenience methods.
132///
133/// This is Layer 1 of the API, providing low-level access to the CFB container
134/// with helpers for reverse engineering and exploration.
135pub struct AltiumCfb<R: Read + Seek> {
136    cf: CompoundFile<R>,
137    file_type: AltiumFileType,
138    section_keys: Option<HashMap<String, String>>,
139}
140
141impl AltiumCfb<File> {
142    /// Opens a CFB file by path.
143    pub fn open_file<P: AsRef<Path>>(path: P) -> Result<Self> {
144        let file = File::open(path.as_ref()).map_err(AltiumError::Io)?;
145        Self::open(file)
146    }
147}
148
149impl<R: Read + Seek> AltiumCfb<R> {
150    /// Opens a CFB from a reader.
151    pub fn open(reader: R) -> Result<Self> {
152        let cf = CompoundFile::open(reader).map_err(|e| {
153            AltiumError::Io(io::Error::new(io::ErrorKind::InvalidData, e.to_string()))
154        })?;
155
156        let mut wrapper = AltiumCfb {
157            cf,
158            file_type: AltiumFileType::Unknown,
159            section_keys: None,
160        };
161
162        wrapper.file_type = wrapper.detect_file_type();
163        Ok(wrapper)
164    }
165
166    /// Returns the detected file type.
167    pub fn file_type(&self) -> AltiumFileType {
168        self.file_type
169    }
170
171    /// Detects the file type by examining stream structure.
172    fn detect_file_type(&mut self) -> AltiumFileType {
173        // Check for PCB-specific streams first
174        if self.exists("/Board6/Data") {
175            return AltiumFileType::PcbDoc;
176        }
177        if self.exists("/Library/Data") {
178            return AltiumFileType::PcbLib;
179        }
180
181        // Check for schematic streams
182        if self.exists("/FileHeader") {
183            // SchLib has /SectionKeys or nested storages
184            if self.exists("/SectionKeys") {
185                return AltiumFileType::SchLib;
186            }
187
188            // Check for component storages (SchLib pattern)
189            for entry in self.cf.walk() {
190                if entry.is_storage() && entry.path().components().count() == 2 {
191                    // Has a nested storage at depth 1 - likely SchLib
192                    return AltiumFileType::SchLib;
193                }
194            }
195
196            return AltiumFileType::SchDoc;
197        }
198
199        AltiumFileType::Unknown
200    }
201
202    // --- Navigation ---
203
204    /// Returns true if the given path exists in the CFB.
205    pub fn exists(&mut self, path: &str) -> bool {
206        self.cf.exists(path)
207    }
208
209    /// Lists all streams in the CFB container.
210    pub fn streams(&mut self) -> Result<Vec<StreamInfo>> {
211        let mut streams = Vec::new();
212
213        for entry in self.cf.walk() {
214            if entry.is_stream() {
215                streams.push(StreamInfo {
216                    path: entry.path().to_string_lossy().to_string(),
217                    size: entry.len(),
218                });
219            }
220        }
221
222        // Sort by path for consistent ordering
223        streams.sort_by(|a, b| a.path.cmp(&b.path));
224        Ok(streams)
225    }
226
227    /// Lists all storages (directories) in the CFB container.
228    pub fn storages(&mut self) -> Result<Vec<StorageInfo>> {
229        let mut storages = Vec::new();
230        let mut child_counts: HashMap<String, usize> = HashMap::new();
231
232        // Count children for each storage
233        for entry in self.cf.walk() {
234            if let Some(parent) = entry.path().parent() {
235                let parent_path = parent.to_string_lossy().to_string();
236                *child_counts.entry(parent_path).or_insert(0) += 1;
237            }
238        }
239
240        for entry in self.cf.walk() {
241            if entry.is_storage() {
242                let path = entry.path().to_string_lossy().to_string();
243                let child_count = child_counts.get(&path).copied().unwrap_or(0);
244                storages.push(StorageInfo { path, child_count });
245            }
246        }
247
248        storages.sort_by(|a, b| a.path.cmp(&b.path));
249        Ok(storages)
250    }
251
252    // --- Section Keys ---
253
254    /// Loads and caches section keys mapping (LIBREF → storage path).
255    ///
256    /// Section keys map full component names to truncated storage paths,
257    /// which is necessary because CFB has a 31-character limit on entry names.
258    pub fn section_keys(&mut self) -> Result<&HashMap<String, String>> {
259        if self.section_keys.is_none() {
260            self.section_keys = Some(self.load_section_keys()?);
261        }
262        Ok(self.section_keys.as_ref().unwrap())
263    }
264
265    /// Loads section keys from /SectionKeys stream.
266    fn load_section_keys(&mut self) -> Result<HashMap<String, String>> {
267        let mut keys = HashMap::new();
268
269        if !self.exists("/SectionKeys") {
270            return Ok(keys);
271        }
272
273        let data = self.read_stream("/SectionKeys")?;
274        let mut cursor = Cursor::new(&data);
275
276        // Read key count
277        let count = cursor.read_i32::<LittleEndian>().unwrap_or(0);
278
279        for _ in 0..count {
280            // Read LIBREF (full name)
281            let lib_ref = match read_string_block(&mut cursor) {
282                Ok(s) => s,
283                Err(_) => break,
284            };
285
286            // Read section key (storage path)
287            let section_key = match read_string_block(&mut cursor) {
288                Ok(s) => s,
289                Err(_) => break,
290            };
291
292            keys.insert(lib_ref, section_key);
293        }
294
295        Ok(keys)
296    }
297
298    /// Resolves a component/footprint name to its storage path.
299    ///
300    /// For names ≤31 chars without '/', returns the name directly.
301    /// For longer names or names with '/', looks up in section keys.
302    pub fn resolve_section(&mut self, lib_ref: &str) -> Result<String> {
303        // Check if name needs mapping
304        if lib_ref.len() <= 31 && !lib_ref.contains('/') {
305            return Ok(lib_ref.to_string());
306        }
307
308        let keys = self.section_keys()?;
309        keys.get(lib_ref)
310            .cloned()
311            .ok_or_else(|| AltiumError::MissingData(format!("Section key not found: {}", lib_ref)))
312    }
313
314    /// Lists all component/footprint names in a library file.
315    pub fn list_components(&mut self) -> Result<Vec<String>> {
316        match self.file_type {
317            AltiumFileType::SchLib => self.list_schlib_components(),
318            AltiumFileType::PcbLib => self.list_pcblib_components(),
319            _ => Ok(Vec::new()),
320        }
321    }
322
323    fn list_schlib_components(&mut self) -> Result<Vec<String>> {
324        if !self.exists("/FileHeader") {
325            return Ok(Vec::new());
326        }
327
328        let data = self.read_stream("/FileHeader")?;
329        let mut cursor = Cursor::new(&data);
330
331        // Read header parameters block
332        let params = ParameterCollection::read_from(&mut cursor)?;
333        let count = params
334            .get("COMPCOUNT")
335            .or_else(|| params.get("WEIGHT"))
336            .map(|v| v.as_int_or(0))
337            .unwrap_or(0);
338
339        let mut components = Vec::with_capacity(count as usize);
340        for _ in 0..count {
341            if let Ok(name) = read_string_block(&mut cursor) {
342                components.push(name);
343            }
344        }
345
346        Ok(components)
347    }
348
349    fn list_pcblib_components(&mut self) -> Result<Vec<String>> {
350        if !self.exists("/Library/Data") {
351            return Ok(Vec::new());
352        }
353
354        let data = self.read_stream("/Library/Data")?;
355        let mut cursor = Cursor::new(&data);
356
357        // Read header block
358        let _header = ParameterCollection::read_from(&mut cursor)?;
359
360        // Read component count
361        let count = cursor.read_i32::<LittleEndian>().unwrap_or(0);
362
363        let mut components = Vec::with_capacity(count as usize);
364        for _ in 0..count {
365            if let Ok(name) = read_string_block(&mut cursor) {
366                components.push(name);
367            }
368        }
369
370        Ok(components)
371    }
372
373    // --- Stream Reading ---
374
375    /// Reads raw bytes from a stream.
376    pub fn read_stream(&mut self, path: &str) -> Result<Vec<u8>> {
377        let mut stream = self
378            .cf
379            .open_stream(path)
380            .map_err(|e| AltiumError::Io(io::Error::new(io::ErrorKind::NotFound, e.to_string())))?;
381
382        let mut data = Vec::new();
383        stream.read_to_end(&mut data).map_err(AltiumError::Io)?;
384        Ok(data)
385    }
386
387    /// Reads a stream and parses it as a single ParameterCollection.
388    pub fn read_params(&mut self, path: &str) -> Result<ParameterCollection> {
389        let data = self.read_stream(path)?;
390        let mut cursor = Cursor::new(&data);
391        ParameterCollection::read_from(&mut cursor)
392    }
393
394    /// Reads a stream as size-prefixed blocks.
395    pub fn read_blocks(&mut self, path: &str) -> Result<Vec<Block>> {
396        let data = self.read_stream(path)?;
397        Self::parse_blocks(&data)
398    }
399
400    /// Parses raw data as size-prefixed blocks.
401    pub fn parse_blocks(data: &[u8]) -> Result<Vec<Block>> {
402        let mut blocks = Vec::new();
403        let mut cursor = Cursor::new(data);
404
405        while (cursor.position() as usize) < data.len() {
406            let offset = cursor.position() as usize;
407
408            let size_raw = match cursor.read_i32::<LittleEndian>() {
409                Ok(s) => s,
410                Err(_) => break,
411            };
412
413            let flags = ((size_raw as u32) >> 24) as u8;
414            let size = (size_raw & 0x00FFFFFF) as usize;
415
416            if size == 0 || offset + 4 + size > data.len() {
417                break;
418            }
419
420            let mut block_data = vec![0u8; size];
421            if cursor.read_exact(&mut block_data).is_err() {
422                break;
423            }
424
425            blocks.push(Block {
426                offset,
427                size,
428                flags,
429                data: block_data,
430            });
431        }
432
433        Ok(blocks)
434    }
435
436    // --- Decompression ---
437
438    /// Decompresses a zlib-compressed stream.
439    pub fn decompress(&mut self, path: &str) -> Result<Vec<u8>> {
440        let data = self.read_stream(path)?;
441        decompress_zlib(&data)
442    }
443
444    /// Decompresses data starting at a given offset.
445    pub fn decompress_at(&mut self, path: &str, offset: usize) -> Result<Vec<u8>> {
446        let data = self.read_stream(path)?;
447        if offset >= data.len() {
448            return Err(AltiumError::Parse("Offset beyond stream length".into()));
449        }
450        decompress_zlib(&data[offset..])
451    }
452
453    // --- Debugging/Reverse Engineering ---
454
455    /// Generates a hexdump of stream content.
456    pub fn hexdump(
457        &mut self,
458        path: &str,
459        offset: usize,
460        length: usize,
461        width: usize,
462    ) -> Result<String> {
463        let data = self.read_stream(path)?;
464        let end = if length == 0 {
465            data.len()
466        } else {
467            (offset + length).min(data.len())
468        };
469
470        if offset >= data.len() {
471            return Ok(String::new());
472        }
473
474        let slice = &data[offset..end];
475        Ok(format_hexdump(slice, offset, width))
476    }
477
478    /// Finds printable strings in a stream.
479    pub fn find_strings(&mut self, path: &str, min_length: usize) -> Result<Vec<FoundString>> {
480        let data = self.read_stream(path)?;
481        Ok(extract_strings(&data, min_length))
482    }
483
484    /// Searches for a pattern across all streams.
485    pub fn search(&mut self, pattern: &str, ignore_case: bool) -> Result<Vec<SearchMatch>> {
486        let mut matches = Vec::new();
487        let pattern_bytes = if ignore_case {
488            pattern.to_lowercase().into_bytes()
489        } else {
490            pattern.as_bytes().to_vec()
491        };
492
493        for stream in self.streams()? {
494            let data = self.read_stream(&stream.path)?;
495            let search_data = if ignore_case {
496                data.iter()
497                    .map(|b| b.to_ascii_lowercase())
498                    .collect::<Vec<_>>()
499            } else {
500                data.clone()
501            };
502
503            for (i, window) in search_data.windows(pattern_bytes.len()).enumerate() {
504                if window == pattern_bytes.as_slice() {
505                    matches.push(SearchMatch {
506                        stream: stream.path.clone(),
507                        offset: i,
508                        context: extract_context(&data, i, 32),
509                    });
510                }
511            }
512        }
513
514        Ok(matches)
515    }
516
517    /// Returns mutable access to the underlying CompoundFile.
518    ///
519    /// Use this for advanced operations not covered by the wrapper.
520    pub fn inner(&mut self) -> &mut CompoundFile<R> {
521        &mut self.cf
522    }
523
524    /// Returns immutable access to the underlying CompoundFile.
525    pub fn inner_ref(&self) -> &CompoundFile<R> {
526        &self.cf
527    }
528
529    /// Returns the CFB format version.
530    pub fn version(&self) -> cfb::Version {
531        self.cf.version()
532    }
533
534    /// Checks if a path exists and returns whether it's a stream.
535    ///
536    /// Returns `Some(true)` if the path is a stream, `Some(false)` if it's a storage,
537    /// `None` if the path doesn't exist.
538    pub fn entry_type(&mut self, path: &str) -> Option<bool> {
539        match self.cf.entry(path) {
540            Ok(entry) => Some(entry.is_stream()),
541            Err(_) => None,
542        }
543    }
544
545    /// Returns the size of a stream, or None if the path doesn't exist or isn't a stream.
546    pub fn stream_size(&mut self, path: &str) -> Option<u64> {
547        match self.cf.entry(path) {
548            Ok(entry) if entry.is_stream() => Some(entry.len()),
549            _ => None,
550        }
551    }
552
553    /// Returns an iterator over all entries in the CFB, yielding (path, is_stream, size).
554    pub fn entries(&mut self) -> Vec<(String, bool, u64)> {
555        self.cf
556            .walk()
557            .map(|e| {
558                (
559                    e.path().to_string_lossy().to_string(),
560                    e.is_stream(),
561                    e.len(),
562                )
563            })
564            .collect()
565    }
566}
567
568/// A string found during string extraction.
569#[derive(Debug, Clone)]
570pub struct FoundString {
571    /// Offset in the stream
572    pub offset: usize,
573    /// The string content
574    pub content: String,
575    /// Detected encoding
576    pub encoding: StringEncoding,
577}
578
579/// Detected string encoding.
580#[derive(Debug, Clone, Copy, PartialEq, Eq)]
581pub enum StringEncoding {
582    Ascii,
583    Utf16Le,
584    Windows1252,
585}
586
587/// A search match result.
588#[derive(Debug, Clone)]
589pub struct SearchMatch {
590    /// Stream path where match was found
591    pub stream: String,
592    /// Offset in the stream
593    pub offset: usize,
594    /// Context around the match
595    pub context: String,
596}
597
598// --- Helper Functions ---
599
600fn format_hexdump(data: &[u8], base_offset: usize, width: usize) -> String {
601    let mut result = String::new();
602    let width = width.clamp(8, 32);
603
604    for (i, chunk) in data.chunks(width).enumerate() {
605        let offset = base_offset + i * width;
606
607        // Offset
608        result.push_str(&format!("{:08x}  ", offset));
609
610        // Hex bytes
611        for (j, byte) in chunk.iter().enumerate() {
612            if j == width / 2 {
613                result.push(' ');
614            }
615            result.push_str(&format!("{:02x} ", byte));
616        }
617
618        // Padding for incomplete lines
619        for j in chunk.len()..width {
620            if j == width / 2 {
621                result.push(' ');
622            }
623            result.push_str("   ");
624        }
625
626        // ASCII representation
627        result.push_str(" |");
628        for byte in chunk {
629            let c = if byte.is_ascii_graphic() || *byte == b' ' {
630                *byte as char
631            } else {
632                '.'
633            };
634            result.push(c);
635        }
636        result.push_str("|\n");
637    }
638
639    result
640}
641
642fn extract_strings(data: &[u8], min_length: usize) -> Vec<FoundString> {
643    let mut strings = Vec::new();
644    let mut current = Vec::new();
645    let mut start = 0;
646
647    for (i, &byte) in data.iter().enumerate() {
648        if byte.is_ascii_graphic() || byte == b' ' {
649            if current.is_empty() {
650                start = i;
651            }
652            current.push(byte);
653        } else if !current.is_empty() {
654            if current.len() >= min_length {
655                strings.push(FoundString {
656                    offset: start,
657                    content: String::from_utf8_lossy(&current).to_string(),
658                    encoding: StringEncoding::Ascii,
659                });
660            }
661            current.clear();
662        }
663    }
664
665    // Handle trailing string
666    if current.len() >= min_length {
667        strings.push(FoundString {
668            offset: start,
669            content: String::from_utf8_lossy(&current).to_string(),
670            encoding: StringEncoding::Ascii,
671        });
672    }
673
674    strings
675}
676
677fn extract_context(data: &[u8], offset: usize, context_len: usize) -> String {
678    let start = offset.saturating_sub(context_len / 2);
679    let end = (offset + context_len / 2).min(data.len());
680
681    let slice = &data[start..end];
682    let mut result = String::new();
683
684    for &byte in slice {
685        if byte.is_ascii_graphic() || byte == b' ' {
686            result.push(byte as char);
687        } else {
688            result.push('.');
689        }
690    }
691
692    result
693}
694
695use crate::traits::FromBinary;
696
697#[cfg(test)]
698mod tests {
699    use super::*;
700
701    #[test]
702    fn test_block_flags() {
703        let block = Block {
704            offset: 0,
705            size: 100,
706            flags: 0x01,
707            data: vec![],
708        };
709        assert!(block.is_binary());
710
711        let block = Block {
712            offset: 0,
713            size: 100,
714            flags: 0x00,
715            data: vec![],
716        };
717        assert!(!block.is_binary());
718    }
719
720    #[test]
721    fn test_block_as_params() {
722        let data = b"|RECORD=1|NAME=Test|\0".to_vec();
723        let block = Block {
724            offset: 0,
725            size: data.len(),
726            flags: 0x00,
727            data,
728        };
729
730        let params = block.as_params().expect("Should parse as params");
731        assert_eq!(params.get("RECORD").unwrap().as_int_or(0), 1);
732        assert_eq!(params.get("NAME").unwrap().as_str(), "Test");
733    }
734
735    #[test]
736    fn test_hexdump_format() {
737        let data = b"Hello, World!";
738        let dump = format_hexdump(data, 0, 16);
739        assert!(dump.contains("48 65 6c 6c")); // "Hell"
740        assert!(dump.contains("|Hello, World!|"));
741    }
742
743    #[test]
744    fn test_extract_strings() {
745        let data = b"\x00\x00Hello\x00World\x00\x00";
746        let strings = extract_strings(data, 4);
747        assert_eq!(strings.len(), 2);
748        assert_eq!(strings[0].content, "Hello");
749        assert_eq!(strings[1].content, "World");
750    }
751}