dbase/
reading.rs

1//! Module with the definition of fn's and struct's to read .dbf files
2
3use std::convert::TryFrom;
4use std::fs::File;
5use std::io::{BufReader, Read, Seek, SeekFrom};
6use std::iter::FusedIterator;
7use std::path::Path;
8
9use crate::encoding::DynEncoding;
10use crate::error::{Error, ErrorKind, FieldIOError};
11use crate::field::types::{FieldType, FieldValue, TrimOption};
12use crate::field::{DeletionFlag, FieldInfo};
13use crate::header::Header;
14use crate::memo::{MemoFileType, MemoReader};
15use crate::{Encoding, FieldConversionError, Record};
16
17/// Value of the byte between the last RecordFieldInfo and the first record
18pub(crate) const TERMINATOR_VALUE: u8 = 0x0D;
19
20pub(crate) const BACKLINK_SIZE: u16 = 263;
21
22/// Trait to be implemented by structs that represent records read from a
23/// dBase file.
24///
25/// The field iterator gives access to methods that allow to read fields value
26/// or skip them.
27/// It is not required that the user reads / skips all the fields in a record,
28/// in other words: it is not required to consume the iterator.
29pub trait ReadableRecord: Sized {
30    /// function to be implemented that returns a new instance of your type
31    /// using values read from the `FieldIterator'
32    fn read_using<Source, MemoSource>(
33        field_iterator: &mut FieldIterator<Source, MemoSource>,
34    ) -> Result<Self, FieldIOError>
35    where
36        Source: Read + Seek,
37        MemoSource: Read + Seek;
38}
39
40/// Structs containing the information allowing to
41/// create a new TableWriter which would write file
42/// with the same record structure as another dbase file.
43///
44/// You can get this by using [Reader::into_table_info].
45#[derive(Clone)]
46pub struct TableInfo {
47    pub(crate) header: Header,
48    pub(crate) fields_info: Vec<FieldInfo>,
49    pub(crate) encoding: DynEncoding,
50}
51
52/// Options related to reading
53#[derive(Copy, Clone, Debug)]
54pub struct ReadingOptions {
55    pub(crate) character_trim: TrimOption,
56}
57
58impl Default for ReadingOptions {
59    fn default() -> Self {
60        Self {
61            character_trim: TrimOption::BeginEnd,
62        }
63    }
64}
65
66impl ReadingOptions {
67    /// Customize how spaces ` ` are trimmed within [FieldValue::Character]
68    ///
69    /// By default they are trimmed at the begining and the end
70    pub fn character_trim(mut self, trim_option: TrimOption) -> Self {
71        self.character_trim = trim_option;
72        self
73    }
74}
75
76/// Convenience builder to create a reader directly from file sources
77///
78/// # Example
79///
80/// ```
81/// use std::fs::File;
82///
83/// # fn main() -> Result<(), dbase::Error> {
84/// let dbf_file = File::open("tests/data/line.dbf").unwrap();
85/// let options = dbase::ReadingOptions::default()
86///     .character_trim(dbase::TrimOption::BeginEnd);
87///
88/// let mut reader = dbase::ReaderBuilder::new(dbf_file)
89///     .with_options(options)
90///     .with_encoding(dbase::encoding::UnicodeLossy)
91///     .build()
92///     .unwrap();
93///
94/// let records = reader.read()?;
95/// assert_eq!(records.len(), 1);
96/// # Ok(())
97/// # }
98/// ```
99pub struct ReaderBuilder<T: Read + Seek, E: Encoding + 'static> {
100    source: T,
101    memo_source: Option<T>,
102    encoding: Option<E>,
103    options: Option<ReadingOptions>,
104}
105
106impl<T: Read + Seek, E: Encoding + 'static> ReaderBuilder<T, E> {
107    pub fn new(source: T) -> Self {
108        Self {
109            source,
110            memo_source: None,
111            encoding: None,
112            options: None,
113        }
114    }
115
116    pub fn with_memo(mut self, memo_source: T) -> Self {
117        self.memo_source = Some(memo_source);
118
119        self
120    }
121
122    pub fn with_encoding(mut self, encoding: E) -> Self {
123        self.encoding = Some(encoding);
124
125        self
126    }
127
128    pub fn with_options(mut self, options: ReadingOptions) -> Self {
129        self.options = Some(options);
130
131        self
132    }
133
134    pub fn build(self) -> Result<Reader<T>, Error> {
135        let file = crate::File::open(self.source)?;
136
137        let memo_reader = if let Some(memo_source) = self.memo_source {
138            let memo_type = file.header.file_type.supported_memo_type();
139            if let Some(mt) = memo_type {
140                let memo_reader =
141                    MemoReader::new(mt, memo_source).map_err(|error| Error::io_error(error, 0))?;
142
143                Some(memo_reader)
144            } else {
145                None
146            }
147        } else {
148            None
149        };
150
151        Ok(Reader {
152            source: file.inner,
153            memo_reader,
154            header: file.header,
155            fields_info: file.fields_info.inner,
156            encoding: self
157                .encoding
158                .map_or_else(|| file.encoding, DynEncoding::new),
159            options: self.options.unwrap_or_default(),
160        })
161    }
162}
163
164/// Struct with the handle to the source .dbf file
165/// Responsible for reading the content
166// TODO Debug impl
167#[derive(Clone)]
168pub struct Reader<T: Read + Seek> {
169    /// Where the data is read from
170    source: T,
171    memo_reader: Option<MemoReader<T>>,
172    header: Header,
173    fields_info: Vec<FieldInfo>,
174    encoding: DynEncoding,
175    options: ReadingOptions,
176}
177
178impl<T: Read + Seek> Reader<T> {
179    /// Creates a new reader from the source.
180    ///
181    /// Reads the header and fields information as soon as its created.
182    ///
183    /// Creating a reader from a file path using the [from_path](struct.Reader.html#method.from_path) is the prefered
184    /// way of doing it as it wraps the file in a BufReader for performance.
185    ///
186    /// # Example
187    ///
188    /// ```
189    /// # fn main() -> Result<(), dbase::Error> {
190    /// let mut reader = dbase::Reader::from_path("tests/data/line.dbf")?;
191    /// let records = reader.read()?;
192    /// # Ok(())
193    /// # }
194    ///
195    /// ```
196    ///
197    /// ```
198    /// use std::fs::File;
199    /// # fn main() -> Result<(), dbase::Error> {
200    /// let f = File::open("tests/data/line.dbf").unwrap();
201    /// let reader = dbase::Reader::new(f)?;
202    /// # Ok(())
203    /// # }
204    /// ```
205    pub fn new(source: T) -> Result<Self, Error> {
206        let file = crate::File::open(source)?;
207        Ok(Self {
208            source: file.inner,
209            memo_reader: None,
210            header: file.header,
211            fields_info: file.fields_info.inner,
212            encoding: file.encoding,
213            options: ReadingOptions::default(),
214        })
215    }
216
217    /// Creates a new reader from the source and reads strings using the encoding provided.
218    ///
219    /// See [`Self::new`] for more information.
220    pub fn new_with_encoding<E: Encoding + 'static>(source: T, encoding: E) -> Result<Self, Error> {
221        let mut reader = Self::new(source)?;
222        reader.set_encoding(encoding);
223        Ok(reader)
224    }
225
226    pub fn set_encoding<E: Encoding + 'static>(&mut self, encoding: E) {
227        self.encoding = DynEncoding::new(encoding);
228    }
229
230    pub fn set_options(&mut self, options: ReadingOptions) {
231        self.options = options;
232    }
233
234    /// Returns the header of the file
235    pub fn header(&self) -> &Header {
236        &self.header
237    }
238
239    /// Returns the fields contained in the opened file
240    pub fn fields(&self) -> &[FieldInfo] {
241        &self.fields_info
242    }
243
244    /// Creates an iterator of records of the type you want
245    pub fn iter_records_as<R: ReadableRecord>(&mut self) -> RecordIterator<'_, T, R> {
246        let record_size: usize = self
247            .fields_info
248            .iter()
249            .map(|i| i.field_length as usize)
250            .sum();
251        RecordIterator {
252            reader: self,
253            record_type: std::marker::PhantomData,
254            current_record: 0,
255            record_data_buffer: std::io::Cursor::new(vec![0u8; record_size]),
256            field_data_buffer: [0u8; 255],
257        }
258    }
259
260    /// Shortcut function to get an iterator over the [Records](struct.Record.html) in the file
261    pub fn iter_records(&mut self) -> RecordIterator<'_, T, Record> {
262        self.iter_records_as::<Record>()
263    }
264
265    /// Reads all the records of the file inside a `Vec`
266    pub fn read_as<R: ReadableRecord>(&mut self) -> Result<Vec<R>, Error> {
267        // We don't read the file terminator
268        self.iter_records_as::<R>()
269            .collect::<Result<Vec<R>, Error>>()
270    }
271
272    /// Make the `Reader` read the [Records](struct.Record.html)
273    ///
274    /// # Examples
275    ///
276    /// ```
277    /// use std::fs::File;
278    /// # fn main() -> Result<(), dbase::Error> {
279    /// let mut reader = dbase::Reader::from_path("tests/data/line.dbf")?;
280    /// let records = reader.read()?;
281    /// assert_eq!(records.len(), 1);
282    /// # Ok(())
283    /// # }
284    /// ```
285    pub fn read(&mut self) -> Result<Vec<Record>, Error> {
286        // We don't read the file terminator
287        self.iter_records().collect::<Result<Vec<Record>, Error>>()
288    }
289
290    /// Seek to the start of the record at `index`
291    pub fn seek(&mut self, index: usize) -> Result<(), Error> {
292        let offset = self.header.offset_to_first_record as usize
293            + (index * self.header.size_of_record as usize);
294        self.source
295            .seek(SeekFrom::Start(offset as u64))
296            .map_err(|err| Error::io_error(err, 0))?;
297        Ok(())
298    }
299
300    /// Consumes the reader, and returns the info that
301    /// allow to create a writer that would write a file
302    /// with the same structure.
303    ///
304    /// ```no_run
305    /// # fn main() -> Result<(), dbase::Error> {
306    /// let mut reader = dbase::Reader::from_path("some_file.dbf")?;
307    /// let records = reader.read()?;
308    /// let table_info = reader.into_table_info();
309    /// let writer_1 = dbase::TableWriterBuilder::from_table_info(table_info.clone())
310    ///         .build_with_file_dest("new_file_1.dbf");
311    ///
312    /// let writer_2 = dbase::TableWriterBuilder::from_table_info(table_info)
313    ///         .build_with_file_dest("new_file_2.dbf");
314    /// # Ok(())
315    /// # }
316    ///
317    /// ```
318    pub fn into_table_info(self) -> TableInfo {
319        TableInfo {
320            header: self.header,
321            fields_info: self.fields_info,
322            encoding: self.encoding,
323        }
324    }
325}
326
327impl Reader<BufReader<File>> {
328    /// Creates a new dbase Reader from a path
329    ///
330    /// # Example
331    ///
332    /// ```
333    /// # fn main() -> Result<(), dbase::Error> {
334    /// let reader = dbase::Reader::from_path("tests/data/line.dbf")?;
335    /// # Ok(())
336    /// # }
337    /// ```
338    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
339        let p = path.as_ref().to_owned();
340        let bufreader =
341            BufReader::new(File::open(path).map_err(|error| Error::io_error(error, 0))?);
342        let mut reader = Reader::new(bufreader)?;
343        let at_least_one_field_is_memo = reader
344            .fields_info
345            .iter()
346            .any(|f_info| f_info.field_type == FieldType::Memo);
347
348        if at_least_one_field_is_memo {
349            let memo_type = reader.header.file_type.supported_memo_type();
350            if let Some(mt) = memo_type {
351                let memo_path = match mt {
352                    MemoFileType::DbaseMemo | MemoFileType::DbaseMemo4 => p.with_extension("dbt"),
353                    MemoFileType::FoxBaseMemo => p.with_extension("fpt"),
354                };
355
356                let memo_file = File::open(memo_path).map_err(|error| Error {
357                    record_num: 0,
358                    field: None,
359                    kind: ErrorKind::ErrorOpeningMemoFile(error),
360                })?;
361
362                let memo_reader = MemoReader::new(mt, BufReader::new(memo_file))
363                    .map_err(|error| Error::io_error(error, 0))?;
364                reader.memo_reader = Some(memo_reader);
365            }
366        }
367        Ok(reader)
368    }
369
370    /// Creates a new dbase Reader from a path and reads string using the encoding provided.
371    pub fn from_path_with_encoding<P: AsRef<Path>, E: Encoding + 'static>(
372        path: P,
373        encoding: E,
374    ) -> Result<Self, Error> {
375        let mut reader = Self::from_path(path)?;
376        reader.encoding = DynEncoding::new(encoding);
377        Ok(reader)
378    }
379}
380
381/// Simple struct to wrap together the value with the name
382/// of the field it belongs to
383pub struct NamedValue<'a, T> {
384    /// Reference to the field name the value belongs to
385    pub name: &'a str,
386    /// The value
387    pub value: T,
388}
389
390/// Iterator over the fields in a dBase record
391///
392/// This iterator only iterates over the fields contained in one record.
393///
394/// When trying to read more fields than there are, an EndOfRecord error
395/// will be returned.
396pub struct FieldIterator<'a, Source: Read + Seek, MemoSource: Read + Seek> {
397    /// The source from where we read the data
398    pub(crate) source: &'a mut Source,
399    /// The fields that make the records
400    pub(crate) fields_info: std::iter::Peekable<std::slice::Iter<'a, FieldInfo>>,
401    /// The source where the Memo field data is read
402    pub(crate) memo_reader: &'a mut Option<MemoReader<MemoSource>>,
403    /// Buffer where field data is stored
404    pub(crate) field_data_buffer: &'a mut [u8; 255],
405    /// The string encoding
406    pub(crate) encoding: &'a DynEncoding,
407    pub(crate) options: ReadingOptions,
408}
409
410impl<'a, Source: Read + Seek, MemoSource: Read + Seek> FieldIterator<'a, Source, MemoSource> {
411    /// Reads the next field and returns its name and value
412    pub fn read_next_field_impl(&mut self) -> Result<(&'a FieldInfo, FieldValue), FieldIOError> {
413        let field_info = self
414            .fields_info
415            .next()
416            .ok_or_else(FieldIOError::end_of_record)?;
417        Ok((field_info, self.read_field(field_info)?))
418    }
419
420    /// Reads the next field and returns its name and value
421    pub fn read_next_field(&mut self) -> Result<NamedValue<'a, FieldValue>, FieldIOError> {
422        self.read_next_field_impl()
423            .map(|(field_info, field_value)| NamedValue {
424                name: field_info.name(),
425                value: field_value,
426            })
427    }
428
429    /// Reads the next field and tries to convert into the requested type
430    /// using [TryFrom]
431    pub fn read_next_field_as<F>(&mut self) -> Result<NamedValue<'a, F>, FieldIOError>
432    where
433        F: TryFrom<FieldValue, Error = FieldConversionError>,
434    {
435        self.read_next_field_impl()
436            .and_then(|(field_info, field_value)| match F::try_from(field_value) {
437                Ok(v) => Ok(NamedValue {
438                    name: field_info.name(),
439                    value: v,
440                }),
441                Err(e) => Err(FieldIOError::new(e.into(), Some(field_info.to_owned()))),
442            })
443    }
444
445    /// Skips the next field of the record, useful if the field does not interest you
446    /// but the ones after do.
447    ///
448    /// Does nothing if the last field of the record was already skipped or read.
449    pub fn skip_next_field(&mut self) -> Result<(), FieldIOError> {
450        match self.fields_info.next() {
451            None => Ok(()),
452            Some(field_info) => self.skip_field(field_info),
453        }
454    }
455
456    /// Skips all the remaining field of the record
457    ///
458    /// used internally to make sure the data stream is at the right position
459    /// when we will start reading the next record
460    ///
461    /// Does nothing if the last field of the record was already skipped or read.
462    fn skip_remaining_fields(&mut self) -> Result<(), FieldIOError> {
463        while let Some(field_info) = self.fields_info.next() {
464            self.skip_field(field_info)?;
465        }
466        Ok(())
467    }
468
469    /// Reads the raw bytes of the next field without doing any filtering or trimming
470    #[cfg(feature = "serde")]
471    pub(crate) fn read_next_field_raw(&mut self) -> Result<Vec<u8>, FieldIOError> {
472        let field_info = self
473            .fields_info
474            .next()
475            .ok_or(FieldIOError::end_of_record())?;
476        let mut buf = vec![0u8; field_info.field_length as usize];
477        self.source.read_exact(&mut buf).map_err(|error| {
478            FieldIOError::new(ErrorKind::IoError(error), Some(field_info.to_owned()))
479        })?;
480        Ok(buf)
481    }
482
483    #[cfg(feature = "serde")]
484    pub(crate) fn peek_next_field(&mut self) -> Result<NamedValue<'a, FieldValue>, FieldIOError> {
485        let field_info = *self.fields_info.peek().ok_or(FieldIOError {
486            field: None,
487            kind: ErrorKind::EndOfRecord,
488        })?;
489        let value = self.read_field(field_info)?;
490        self.source
491            .seek(SeekFrom::Current(-i64::from(field_info.field_length)))
492            .map_err(|error| {
493                FieldIOError::new(ErrorKind::IoError(error), Some(field_info.to_owned()))
494            })?;
495
496        Ok(NamedValue {
497            name: field_info.name(),
498            value,
499        })
500    }
501
502    /// Advance the source to skip the field
503    fn skip_field(&mut self, field_info: &FieldInfo) -> Result<(), FieldIOError> {
504        self.source
505            .seek(SeekFrom::Current(i64::from(field_info.field_length)))
506            .map_err(|error| {
507                FieldIOError::new(ErrorKind::IoError(error), Some(field_info.to_owned()))
508            })?;
509        Ok(())
510    }
511
512    /// read the next field using the given info
513    fn read_field(&mut self, field_info: &'a FieldInfo) -> Result<FieldValue, FieldIOError> {
514        let field_data_buffer = &mut self.field_data_buffer[..field_info.length() as usize];
515        self.source.read_exact(field_data_buffer).unwrap();
516        match FieldValue::read_from(
517            field_data_buffer,
518            self.memo_reader,
519            field_info,
520            self.encoding,
521            self.options.character_trim,
522        ) {
523            Ok(value) => Ok(value),
524            Err(kind) => Err(FieldIOError {
525                field: Some(field_info.clone()),
526                kind,
527            }),
528        }
529    }
530}
531
532impl<'a, Source: Read + Seek, MemoSource: Read + Seek> Iterator
533    for FieldIterator<'a, Source, MemoSource>
534{
535    type Item = Result<NamedValue<'a, FieldValue>, FieldIOError>;
536
537    fn next(&mut self) -> Option<Self::Item> {
538        match self.read_next_field() {
539            Err(error) => match error.kind() {
540                ErrorKind::EndOfRecord => None,
541                _ => Some(Err(error)),
542            },
543            Ok(field_value) => Some(Ok(field_value)),
544        }
545    }
546}
547
548impl<Source: Read + Seek, MemoSource: Read + Seek> FusedIterator
549    for FieldIterator<'_, Source, MemoSource>
550{
551}
552
553/// Iterator over records contained in the dBase
554pub struct RecordIterator<'a, T: Read + Seek, R: ReadableRecord> {
555    reader: &'a mut Reader<T>,
556    record_type: std::marker::PhantomData<R>,
557    current_record: u32,
558    record_data_buffer: std::io::Cursor<Vec<u8>>,
559    /// Non-Memo field length is stored on a u8,
560    /// so fields cannot exceed 255 bytes
561    field_data_buffer: [u8; 255],
562}
563
564impl<T: Read + Seek, R: ReadableRecord> Iterator for RecordIterator<'_, T, R> {
565    type Item = Result<R, Error>;
566
567    fn next(&mut self) -> Option<Self::Item> {
568        loop {
569            return if self.current_record >= self.reader.header.num_records {
570                None
571            } else {
572                let deletion_flag = DeletionFlag::read_from(&mut self.reader.source).ok()?;
573
574                if deletion_flag == DeletionFlag::Deleted {
575                    self.reader
576                        .source
577                        .seek(SeekFrom::Current(
578                            self.record_data_buffer.get_ref().len() as i64
579                        ))
580                        .ok()?;
581                    continue;
582                }
583
584                self.reader
585                    .source
586                    .read_exact(self.record_data_buffer.get_mut())
587                    .ok()?;
588                self.record_data_buffer.set_position(0);
589
590                let mut iter = FieldIterator {
591                    source: &mut self.record_data_buffer,
592                    fields_info: self.reader.fields_info.iter().peekable(),
593                    memo_reader: &mut self.reader.memo_reader,
594                    field_data_buffer: &mut self.field_data_buffer,
595                    encoding: &self.reader.encoding,
596                    options: self.reader.options,
597                };
598
599                let record = R::read_using(&mut iter)
600                    .and_then(|record| iter.skip_remaining_fields().and(Ok(record)))
601                    .map_err(|error| Error::new(error, self.current_record as usize));
602                self.current_record += 1;
603                Some(record)
604            };
605        }
606    }
607}
608
609/// One-liner to read the content of a .dbf file
610///
611/// # Example
612///
613/// ```
614/// let records = dbase::read("tests/data/line.dbf").unwrap();
615/// assert_eq!(records.len(), 1);
616/// ```
617pub fn read<P: AsRef<Path>>(path: P) -> Result<Vec<Record>, Error> {
618    let mut reader = Reader::from_path(path)?;
619    reader.read()
620}
621
622#[cfg(test)]
623mod test {
624    use std::fs::File;
625    use std::io::Seek;
626
627    use super::*;
628
629    #[test]
630    fn pos_after_reading() {
631        let file = File::open("tests/data/line.dbf").unwrap();
632        let mut reader = Reader::new(file).unwrap();
633        let pos_after_reading = reader.source.stream_position().unwrap();
634
635        let mut expected_pos = Header::SIZE + ((reader.fields_info.len()) * FieldInfo::SIZE);
636        // Don't forget terminator
637        expected_pos += size_of::<u8>();
638
639        assert_eq!(pos_after_reading, expected_pos as u64);
640    }
641}