fst_native/
reader.rs

1// Copyright 2023 The Regents of the University of California
2// released under BSD 3-Clause License
3// author: Kevin Laeufer <laeufer@berkeley.edu>
4
5use crate::io::*;
6use crate::types::*;
7use std::io::{BufRead, BufReader, Read, Seek, SeekFrom, Write};
8
9/// Reads in a FST file.
10pub struct FstReader<R: BufRead + Seek> {
11    input: InputVariant<R>,
12    meta: MetaData,
13}
14
15enum InputVariant<R: BufRead + Seek> {
16    Original(R),
17    Uncompressed(BufReader<std::fs::File>),
18    UncompressedInMem(std::io::Cursor<Vec<u8>>),
19}
20
21pub struct FstFilter {
22    pub start: u64,
23    pub end: Option<u64>,
24    pub include: Option<Vec<FstSignalHandle>>,
25}
26
27impl FstFilter {
28    pub fn all() -> Self {
29        FstFilter {
30            start: 0,
31            end: None,
32            include: None,
33        }
34    }
35
36    pub fn new(start: u64, end: u64, signals: Vec<FstSignalHandle>) -> Self {
37        FstFilter {
38            start,
39            end: Some(end),
40            include: Some(signals),
41        }
42    }
43
44    pub fn filter_time(start: u64, end: u64) -> Self {
45        FstFilter {
46            start,
47            end: Some(end),
48            include: None,
49        }
50    }
51
52    pub fn filter_signals(signals: Vec<FstSignalHandle>) -> Self {
53        FstFilter {
54            start: 0,
55            end: None,
56            include: Some(signals),
57        }
58    }
59}
60
61#[derive(Debug, Clone, PartialEq)]
62pub struct FstHeader {
63    /// time of first sample
64    pub start_time: u64,
65    /// time of last sample
66    pub end_time: u64,
67    /// number of variables in the design
68    pub var_count: u64,
69    /// the highest signal handle; indicates the number of unique signals
70    pub max_handle: u64,
71    /// human readable version string
72    pub version: String,
73    /// human readable times stamp
74    pub date: String,
75    /// the exponent of the timescale; timescale will be 10^(exponent) seconds
76    pub timescale_exponent: i8,
77}
78
79impl<R: BufRead + Seek> FstReader<R> {
80    /// Reads in the FST file meta-data.
81    pub fn open(input: R) -> Result<Self> {
82        Self::open_internal(input, false)
83    }
84
85    pub fn open_and_read_time_table(input: R) -> Result<Self> {
86        Self::open_internal(input, true)
87    }
88
89    fn open_internal(mut input: R, read_time_table: bool) -> Result<Self> {
90        let uncompressed_input = uncompress_gzip_wrapper(&mut input)?;
91        match uncompressed_input {
92            UncompressGzipWrapper::None => {
93                let mut header_reader = HeaderReader::new(input);
94                header_reader.read(read_time_table)?;
95                let (input, meta) = header_reader.into_input_and_meta_data().unwrap();
96                Ok(FstReader {
97                    input: InputVariant::Original(input),
98                    meta,
99                })
100            }
101            UncompressGzipWrapper::TempFile(uc) => {
102                let mut header_reader = HeaderReader::new(uc);
103                header_reader.read(read_time_table)?;
104                let (uc2, meta) = header_reader.into_input_and_meta_data().unwrap();
105                Ok(FstReader {
106                    input: InputVariant::Uncompressed(uc2),
107                    meta,
108                })
109            }
110            UncompressGzipWrapper::InMemory(uc) => {
111                let mut header_reader = HeaderReader::new(uc);
112                header_reader.read(read_time_table)?;
113                let (uc2, meta) = header_reader.into_input_and_meta_data().unwrap();
114                Ok(FstReader {
115                    input: InputVariant::UncompressedInMem(uc2),
116                    meta,
117                })
118            }
119        }
120    }
121
122    pub fn get_header(&self) -> FstHeader {
123        FstHeader {
124            start_time: self.meta.header.start_time,
125            end_time: self.meta.header.end_time,
126            var_count: self.meta.header.var_count,
127            max_handle: self.meta.header.max_var_id_code,
128            version: self.meta.header.version.clone(),
129            date: self.meta.header.date.clone(),
130            timescale_exponent: self.meta.header.timescale_exponent,
131        }
132    }
133
134    pub fn get_time_table(&self) -> Option<&[u64]> {
135        match &self.meta.time_table {
136            Some(table) => Some(table),
137            None => None,
138        }
139    }
140
141    /// Reads the hierarchy and calls callback for every item.
142    pub fn read_hierarchy(&mut self, callback: impl FnMut(FstHierarchyEntry)) -> Result<()> {
143        match &mut self.input {
144            InputVariant::Original(input) => read_hierarchy(input, &self.meta, callback),
145            InputVariant::Uncompressed(input) => read_hierarchy(input, &self.meta, callback),
146            InputVariant::UncompressedInMem(input) => read_hierarchy(input, &self.meta, callback),
147        }
148    }
149
150    /// Read signal values for a specific time interval.
151    pub fn read_signals(
152        &mut self,
153        filter: &FstFilter,
154        callback: impl FnMut(u64, FstSignalHandle, FstSignalValue),
155    ) -> Result<()> {
156        // convert user filters
157        let signal_count = self.meta.signals.len();
158        let signal_mask = if let Some(signals) = &filter.include {
159            let mut signal_mask = BitMask::repeat(false, signal_count);
160            for sig in signals {
161                let signal_idx = sig.get_index();
162                signal_mask.set(signal_idx, true);
163            }
164            signal_mask
165        } else {
166            // include all
167            BitMask::repeat(true, signal_count)
168        };
169        let data_filter = DataFilter {
170            start: filter.start,
171            end: filter.end.unwrap_or(self.meta.header.end_time),
172            signals: signal_mask,
173        };
174
175        // build and run reader
176        match &mut self.input {
177            InputVariant::Original(input) => {
178                read_signals(input, &self.meta, &data_filter, callback)
179            }
180            InputVariant::Uncompressed(input) => {
181                read_signals(input, &self.meta, &data_filter, callback)
182            }
183            InputVariant::UncompressedInMem(input) => {
184                read_signals(input, &self.meta, &data_filter, callback)
185            }
186        }
187    }
188}
189
190pub enum FstSignalValue<'a> {
191    String(&'a [u8]),
192    Real(f64),
193}
194
195/// Quickly scans an input to see if it could be a FST file.
196pub fn is_fst_file(input: &mut (impl Read + Seek)) -> bool {
197    let is_fst = matches!(internal_check_fst_file(input), Ok(true));
198    // try to reset input
199    let _ = input.seek(SeekFrom::Start(0));
200    is_fst
201}
202
203/// Returns an error or false if not an fst. Returns Ok(true) only if we think it is an fst.
204fn internal_check_fst_file(input: &mut (impl Read + Seek)) -> Result<bool> {
205    // try to iterate over all blocks
206    loop {
207        let _block_tpe = match read_block_tpe(input) {
208            Err(ReaderError::Io(_)) => {
209                break;
210            }
211            Err(other) => return Err(other),
212            Ok(tpe) => tpe,
213        };
214        let section_length = read_u64(input)?;
215        input.seek(SeekFrom::Current((section_length as i64) - 8))?;
216    }
217    Ok(true)
218}
219
220fn read_hierarchy(
221    input: &mut (impl Read + Seek),
222    meta: &MetaData,
223    mut callback: impl FnMut(FstHierarchyEntry),
224) -> Result<()> {
225    input.seek(SeekFrom::Start(meta.hierarchy_offset))?;
226    let bytes = read_hierarchy_bytes(input, meta.hierarchy_compression)?;
227    let mut input = bytes.as_slice();
228    let mut handle_count = 0u32;
229    while let Some(entry) = read_hierarchy_entry(&mut input, &mut handle_count)? {
230        callback(entry);
231    }
232    Ok(())
233}
234
235fn read_signals(
236    input: &mut (impl Read + Seek),
237    meta: &MetaData,
238    filter: &DataFilter,
239    mut callback: impl FnMut(u64, FstSignalHandle, FstSignalValue),
240) -> Result<()> {
241    let mut reader = DataReader {
242        input,
243        meta,
244        filter,
245        callback: &mut callback,
246    };
247    reader.read()
248}
249
250enum UncompressGzipWrapper {
251    None,
252    TempFile(BufReader<std::fs::File>),
253    InMemory(std::io::Cursor<Vec<u8>>),
254}
255
256#[cfg(target_arch = "wasm32")]
257const WE_HAVE_A_FILE_SYSTEM: bool = false;
258
259#[cfg(not(target_arch = "wasm32"))]
260const WE_HAVE_A_FILE_SYSTEM: bool = true;
261
262/// Checks to see if the whole file is compressed in which case it is decompressed
263/// to a temp file which is returned.
264fn uncompress_gzip_wrapper(input: &mut (impl Read + Seek)) -> Result<UncompressGzipWrapper> {
265    let block_tpe = read_block_tpe(input)?;
266    if block_tpe != BlockType::GZipWrapper {
267        // no gzip wrapper
268        input.seek(SeekFrom::Start(0))?;
269        Ok(UncompressGzipWrapper::None)
270    } else {
271        // uncompress
272        let section_length = read_u64(input)?;
273        let uncompress_length = read_u64(input)? as usize;
274        if section_length == 0 {
275            return Err(ReaderError::NotFinishedCompressing());
276        }
277
278        // try to use a tempfile
279        if WE_HAVE_A_FILE_SYSTEM {
280            if let Ok(mut target) = tempfile::tempfile() {
281                decompress_gz_in_chunks(input, uncompress_length, &mut target)?;
282                // go to start of new file and return
283                target.seek(SeekFrom::Start(0))?;
284                let new_input = std::io::BufReader::new(target);
285                return Ok(UncompressGzipWrapper::TempFile(new_input));
286            }
287        }
288        // otherwise decompress into memory
289        let mut target = vec![];
290        decompress_gz_in_chunks(input, uncompress_length, &mut target)?;
291        let new_input = std::io::Cursor::new(target);
292        Ok(UncompressGzipWrapper::InMemory(new_input))
293    }
294}
295
296fn decompress_gz_in_chunks(
297    input: &mut (impl Read + Seek),
298    mut remaining: usize,
299    target: &mut impl Write,
300) -> Result<()> {
301    let mut decoder = flate2::read::GzDecoder::new(input);
302    let mut buf = vec![0u8; 32768]; // FST_GZIO_LEN
303    while remaining > 0 {
304        let read_len = std::cmp::min(buf.len(), remaining);
305        remaining -= read_len;
306        decoder.read_exact(&mut buf[..read_len])?;
307        target.write_all(&buf[..read_len])?;
308    }
309    Ok(())
310}
311
312#[derive(Debug)]
313struct MetaData {
314    header: Header,
315    signals: Vec<SignalInfo>,
316    #[allow(dead_code)]
317    blackouts: Vec<BlackoutData>,
318    data_sections: Vec<DataSectionInfo>,
319    float_endian: FloatingPointEndian,
320    hierarchy_compression: HierarchyCompression,
321    hierarchy_offset: u64,
322    time_table: Option<Vec<u64>>,
323}
324
325pub type Result<T> = std::result::Result<T, ReaderError>;
326
327struct HeaderReader<R: Read + Seek> {
328    input: R,
329    header: Option<Header>,
330    signals: Option<Vec<SignalInfo>>,
331    blackouts: Option<Vec<BlackoutData>>,
332    data_sections: Vec<DataSectionInfo>,
333    float_endian: FloatingPointEndian,
334    hierarchy: Option<(HierarchyCompression, u64)>,
335    time_table: Option<Vec<u64>>,
336}
337
338impl<R: Read + Seek> HeaderReader<R> {
339    fn new(input: R) -> Self {
340        HeaderReader {
341            input,
342            header: None,
343            signals: None,
344            blackouts: None,
345            data_sections: Vec::default(),
346            float_endian: FloatingPointEndian::Little,
347            hierarchy: None,
348            time_table: None,
349        }
350    }
351
352    fn read_data(&mut self, tpe: &BlockType) -> Result<()> {
353        let file_offset = self.input.stream_position()?;
354        // this is the data section
355        let section_length = read_u64(&mut self.input)?;
356        let start_time = read_u64(&mut self.input)?;
357        let end_time = read_u64(&mut self.input)?;
358        // optional: read the time table
359        if let Some(table) = &mut self.time_table {
360            let (_, mut time_chain) =
361                read_time_chain(&mut self.input, file_offset, section_length)?;
362            // in the first section, we might need to include the start time
363            let is_first_section = table.is_empty();
364            if is_first_section && time_chain[0] > start_time {
365                table.push(start_time);
366            }
367            table.append(&mut time_chain);
368            self.input.seek(SeekFrom::Start(file_offset + 3 * 8))?;
369        }
370        // go to the end of the section
371        self.skip(section_length, 3 * 8)?;
372        let kind = DataSectionKind::from_block_type(tpe).unwrap();
373        let info = DataSectionInfo {
374            file_offset,
375            start_time,
376            end_time,
377            kind,
378        };
379        self.data_sections.push(info);
380        Ok(())
381    }
382
383    fn skip(&mut self, section_length: u64, already_read: i64) -> Result<u64> {
384        Ok(self
385            .input
386            .seek(SeekFrom::Current((section_length as i64) - already_read))?)
387    }
388
389    fn read_hierarchy(&mut self, compression: HierarchyCompression) -> Result<()> {
390        let file_offset = self.input.stream_position()?;
391        // this is the data section
392        let section_length = read_u64(&mut self.input)?;
393        self.skip(section_length, 8)?;
394        assert!(
395            self.hierarchy.is_none(),
396            "Only a single hierarchy block is expected!"
397        );
398        self.hierarchy = Some((compression, file_offset));
399        Ok(())
400    }
401
402    fn read(&mut self, read_time_table: bool) -> Result<()> {
403        if read_time_table {
404            self.time_table = Some(Vec::new());
405        }
406        loop {
407            let block_tpe = match read_block_tpe(&mut self.input) {
408                Err(ReaderError::Io(_)) => {
409                    break;
410                }
411                Err(other) => return Err(other),
412                Ok(tpe) => tpe,
413            };
414
415            match block_tpe {
416                BlockType::Header => {
417                    let (header, endian) = read_header(&mut self.input)?;
418                    self.header = Some(header);
419                    self.float_endian = endian;
420                }
421                BlockType::VcData => self.read_data(&block_tpe)?,
422                BlockType::VcDataDynamicAlias => self.read_data(&block_tpe)?,
423                BlockType::VcDataDynamicAlias2 => self.read_data(&block_tpe)?,
424                BlockType::Blackout => {
425                    self.blackouts = Some(read_blackout(&mut self.input)?);
426                }
427                BlockType::Geometry => {
428                    self.signals = Some(read_geometry(&mut self.input)?);
429                }
430                BlockType::Hierarchy => self.read_hierarchy(HierarchyCompression::ZLib)?,
431                BlockType::HierarchyLZ4 => self.read_hierarchy(HierarchyCompression::Lz4)?,
432                BlockType::HierarchyLZ4Duo => self.read_hierarchy(HierarchyCompression::Lz4Duo)?,
433                BlockType::GZipWrapper => panic!("GZip Wrapper should have been handled earlier!"),
434                BlockType::Skip => {
435                    let section_length = read_u64(&mut self.input)?;
436                    self.skip(section_length, 8)?;
437                }
438            };
439        }
440        Ok(())
441    }
442
443    fn into_input_and_meta_data(mut self) -> Result<(R, MetaData)> {
444        self.input.seek(SeekFrom::Start(0))?;
445        let meta = MetaData {
446            header: self.header.unwrap(),
447            signals: self.signals.unwrap(),
448            blackouts: self.blackouts.unwrap_or_default(),
449            data_sections: self.data_sections,
450            float_endian: self.float_endian,
451            hierarchy_compression: self.hierarchy.unwrap().0,
452            hierarchy_offset: self.hierarchy.unwrap().1,
453            time_table: self.time_table,
454        };
455        Ok((self.input, meta))
456    }
457}
458
459struct DataReader<'a, R: Read + Seek, F: FnMut(u64, FstSignalHandle, FstSignalValue)> {
460    input: &'a mut R,
461    meta: &'a MetaData,
462    filter: &'a DataFilter,
463    callback: &'a mut F,
464}
465
466impl<'a, R: Read + Seek, F: FnMut(u64, FstSignalHandle, FstSignalValue)> DataReader<'a, R, F> {
467    fn read_value_changes(
468        &mut self,
469        section_kind: DataSectionKind,
470        section_start: u64,
471        section_length: u64,
472        time_section_length: u64,
473        time_chain: &[u64],
474    ) -> Result<()> {
475        let (max_handle, _) = read_variant_u64(&mut self.input)?;
476        let vc_start = self.input.stream_position()?;
477        let packtpe = ValueChangePackType::from_u8(read_u8(&mut self.input)?);
478
479        // the chain length is right in front of the time section
480        let chain_len_offset = section_start + section_length - time_section_length - 8;
481        let (chain_table, chain_table_lengths) = read_chain_table(
482            &mut self.input,
483            chain_len_offset,
484            section_kind,
485            max_handle,
486            vc_start,
487        )?;
488
489        // read data and create a bunch of pointers
490        let mut mu: Vec<u8> = Vec::new();
491        let mut head_pointer: Vec<u32> = Vec::with_capacity(max_handle as usize);
492        let mut length_remaining: Vec<u32> = Vec::with_capacity(max_handle as usize);
493        let mut scatter_pointer = vec![0u32; max_handle as usize];
494        let mut tc_head = vec![0u32; std::cmp::max(1, time_chain.len())];
495
496        for (signal_idx, (entry, length)) in chain_table
497            .iter()
498            .zip(chain_table_lengths.iter())
499            .take(max_handle as usize)
500            .enumerate()
501        {
502            // was there a signal change?
503            if *entry != 0 {
504                // is the signal supposed to be included?
505                if self.filter.signals.is_set(signal_idx) {
506                    // read all signal values
507                    self.input.seek(SeekFrom::Start(vc_start + entry))?;
508                    let mut bytes =
509                        read_packed_signal_value_bytes(&mut self.input, *length, packtpe)?;
510
511                    // read first time delta
512                    let len = self.meta.signals[signal_idx].len();
513                    let tdelta = if len == 1 {
514                        read_one_bit_signal_time_delta(&bytes, 0)?
515                    } else {
516                        read_multi_bit_signal_time_delta(&bytes, 0)?
517                    };
518
519                    // remember where we stored the signal data and how long it is
520                    head_pointer.push(mu.len() as u32);
521                    length_remaining.push(bytes.len() as u32);
522                    mu.append(&mut bytes);
523
524                    // remember at what time step we will read this signal
525                    scatter_pointer[signal_idx] = tc_head[tdelta];
526                    tc_head[tdelta] = signal_idx as u32 + 1; // index to handle
527                }
528            }
529            // if there was no real value added, we add dummy values to ensure that we can
530            // index the Vec with the signal ID
531            if head_pointer.len() == signal_idx {
532                head_pointer.push(1234);
533                length_remaining.push(1234);
534            }
535        }
536
537        for (time_id, time) in time_chain.iter().enumerate() {
538            // while we cannot ignore signal changes before the start of the window
539            // (since the signal might retain values for multiple cycles),
540            // signal changes after our window are completely useless
541            if *time > self.filter.end {
542                break;
543            }
544            // handles cannot be zero
545            while tc_head[time_id] != 0 {
546                let signal_id = (tc_head[time_id] - 1) as usize; // convert handle to index
547                let mut mu_slice = &mu.as_slice()[head_pointer[signal_id] as usize..];
548                let (vli, skiplen) = read_variant_u32(&mut mu_slice)?;
549                let signal_len = self.meta.signals[signal_id].len();
550                let signal_handle = FstSignalHandle::from_index(signal_id);
551                let len = match signal_len {
552                    1 => {
553                        let value = one_bit_signal_value_to_char(vli);
554                        let value_buf = [value];
555                        (self.callback)(*time, signal_handle, FstSignalValue::String(&value_buf));
556                        0 // no additional bytes consumed
557                    }
558                    0 => {
559                        let (len, skiplen2) = read_variant_u32(&mut mu_slice)?;
560                        let value = read_bytes(&mut mu_slice, len as usize)?;
561                        (self.callback)(*time, signal_handle, FstSignalValue::String(&value));
562                        len + skiplen2
563                    }
564                    len => {
565                        let signal_len = len as usize;
566                        if !self.meta.signals[signal_id].is_real() {
567                            let (value, len) = if (vli & 1) == 0 {
568                                // if bit0 is zero -> 2-state
569                                let read_len = signal_len.div_ceil(8);
570                                let bytes = read_bytes(&mut mu_slice, read_len)?;
571                                (
572                                    multi_bit_digital_signal_to_chars(&bytes, signal_len),
573                                    read_len as u32,
574                                )
575                            } else {
576                                (read_bytes(&mut mu_slice, signal_len)?, len)
577                            };
578                            (self.callback)(*time, signal_handle, FstSignalValue::String(&value));
579                            len
580                        } else {
581                            assert_eq!(vli & 1, 1, "TODO: implement support for rare packed case");
582                            let value = read_f64(&mut mu_slice, self.meta.float_endian)?;
583                            (self.callback)(*time, signal_handle, FstSignalValue::Real(value));
584                            8
585                        }
586                    }
587                };
588
589                // update pointers
590                let total_skiplen = skiplen + len;
591                // advance "slice" for signal values
592                head_pointer[signal_id] += total_skiplen;
593                length_remaining[signal_id] -= total_skiplen;
594                // find the next signal to read in this time step
595                tc_head[time_id] = scatter_pointer[signal_id];
596                // invalidate pointer
597                scatter_pointer[signal_id] = 0;
598
599                // is there more data for this signal in the current block?
600                if length_remaining[signal_id] > 0 {
601                    let tdelta = if signal_len == 1 {
602                        read_one_bit_signal_time_delta(&mu, head_pointer[signal_id])?
603                    } else {
604                        read_multi_bit_signal_time_delta(&mu, head_pointer[signal_id])?
605                    };
606
607                    // point to the next time step
608                    scatter_pointer[signal_id] = tc_head[time_id + tdelta];
609                    tc_head[time_id + tdelta] = (signal_id + 1) as u32; // store handle
610                }
611            }
612        }
613
614        Ok(())
615    }
616
617    fn read(&mut self) -> Result<()> {
618        let sections = self.meta.data_sections.clone();
619        // filter out any sections which are not in our time window
620        let relevant_sections = sections
621            .iter()
622            .filter(|s| self.filter.end >= s.start_time && s.end_time >= self.filter.start);
623        for (sec_num, section) in relevant_sections.enumerate() {
624            // skip to section
625            self.input.seek(SeekFrom::Start(section.file_offset))?;
626            let section_length = read_u64(&mut self.input)?;
627
628            // verify meta-data
629            let start_time = read_u64(&mut self.input)?;
630            let end_time = read_u64(&mut self.input)?;
631            assert_eq!(start_time, section.start_time);
632            assert_eq!(end_time, section.end_time);
633            let is_first_section = sec_num == 0;
634
635            // 66 is for the potential fastlz overhead
636            // let mem_required_for_traversal = read_u64(&mut self.input)? + 66;
637
638            let (time_section_length, time_chain) =
639                read_time_chain(&mut self.input, section.file_offset, section_length)?;
640
641            // only read frame if this is the first section and there is no other data for
642            // the start time
643            if is_first_section && time_chain[0] > start_time {
644                read_frame(
645                    &mut self.input,
646                    section.file_offset,
647                    section_length,
648                    &self.meta.signals,
649                    &self.filter.signals,
650                    self.meta.float_endian,
651                    start_time,
652                    self.callback,
653                )?;
654            } else {
655                skip_frame(&mut self.input, section.file_offset)?;
656            }
657
658            self.read_value_changes(
659                section.kind,
660                section.file_offset,
661                section_length,
662                time_section_length,
663                &time_chain,
664            )?;
665        }
666
667        Ok(())
668    }
669}