1use std::convert::TryFrom;
4use std::fs::File;
5use std::io::{BufReader, Read, Seek, SeekFrom};
6use std::iter::FusedIterator;
7use std::path::Path;
8
9use crate::encoding::DynEncoding;
10use crate::error::{Error, ErrorKind, FieldIOError};
11use crate::field::types::{FieldType, FieldValue, TrimOption};
12use crate::field::{DeletionFlag, FieldInfo};
13use crate::header::Header;
14use crate::memo::{MemoFileType, MemoReader};
15use crate::{Encoding, FieldConversionError, Record};
16
17pub(crate) const TERMINATOR_VALUE: u8 = 0x0D;
19
20pub(crate) const BACKLINK_SIZE: u16 = 263;
21
22pub trait ReadableRecord: Sized {
30 fn read_using<Source, MemoSource>(
33 field_iterator: &mut FieldIterator<Source, MemoSource>,
34 ) -> Result<Self, FieldIOError>
35 where
36 Source: Read + Seek,
37 MemoSource: Read + Seek;
38}
39
40#[derive(Clone)]
46pub struct TableInfo {
47 pub(crate) header: Header,
48 pub(crate) fields_info: Vec<FieldInfo>,
49 pub(crate) encoding: DynEncoding,
50}
51
52#[derive(Copy, Clone, Debug)]
54pub struct ReadingOptions {
55 pub(crate) character_trim: TrimOption,
56}
57
58impl Default for ReadingOptions {
59 fn default() -> Self {
60 Self {
61 character_trim: TrimOption::BeginEnd,
62 }
63 }
64}
65
66impl ReadingOptions {
67 pub fn character_trim(mut self, trim_option: TrimOption) -> Self {
71 self.character_trim = trim_option;
72 self
73 }
74}
75
76pub struct ReaderBuilder<T: Read + Seek, E: Encoding + 'static> {
100 source: T,
101 memo_source: Option<T>,
102 encoding: Option<E>,
103 options: Option<ReadingOptions>,
104}
105
106impl<T: Read + Seek, E: Encoding + 'static> ReaderBuilder<T, E> {
107 pub fn new(source: T) -> Self {
108 Self {
109 source,
110 memo_source: None,
111 encoding: None,
112 options: None,
113 }
114 }
115
116 pub fn with_memo(mut self, memo_source: T) -> Self {
117 self.memo_source = Some(memo_source);
118
119 self
120 }
121
122 pub fn with_encoding(mut self, encoding: E) -> Self {
123 self.encoding = Some(encoding);
124
125 self
126 }
127
128 pub fn with_options(mut self, options: ReadingOptions) -> Self {
129 self.options = Some(options);
130
131 self
132 }
133
134 pub fn build(self) -> Result<Reader<T>, Error> {
135 let file = crate::File::open(self.source)?;
136
137 let memo_reader = if let Some(memo_source) = self.memo_source {
138 let memo_type = file.header.file_type.supported_memo_type();
139 if let Some(mt) = memo_type {
140 let memo_reader =
141 MemoReader::new(mt, memo_source).map_err(|error| Error::io_error(error, 0))?;
142
143 Some(memo_reader)
144 } else {
145 None
146 }
147 } else {
148 None
149 };
150
151 Ok(Reader {
152 source: file.inner,
153 memo_reader,
154 header: file.header,
155 fields_info: file.fields_info.inner,
156 encoding: self
157 .encoding
158 .map_or_else(|| file.encoding, DynEncoding::new),
159 options: self.options.unwrap_or_default(),
160 })
161 }
162}
163
164#[derive(Clone)]
168pub struct Reader<T: Read + Seek> {
169 source: T,
171 memo_reader: Option<MemoReader<T>>,
172 header: Header,
173 fields_info: Vec<FieldInfo>,
174 encoding: DynEncoding,
175 options: ReadingOptions,
176}
177
178impl<T: Read + Seek> Reader<T> {
179 pub fn new(source: T) -> Result<Self, Error> {
206 let file = crate::File::open(source)?;
207 Ok(Self {
208 source: file.inner,
209 memo_reader: None,
210 header: file.header,
211 fields_info: file.fields_info.inner,
212 encoding: file.encoding,
213 options: ReadingOptions::default(),
214 })
215 }
216
217 pub fn new_with_encoding<E: Encoding + 'static>(source: T, encoding: E) -> Result<Self, Error> {
221 let mut reader = Self::new(source)?;
222 reader.set_encoding(encoding);
223 Ok(reader)
224 }
225
226 pub fn set_encoding<E: Encoding + 'static>(&mut self, encoding: E) {
227 self.encoding = DynEncoding::new(encoding);
228 }
229
230 pub fn set_options(&mut self, options: ReadingOptions) {
231 self.options = options;
232 }
233
234 pub fn header(&self) -> &Header {
236 &self.header
237 }
238
239 pub fn fields(&self) -> &[FieldInfo] {
241 &self.fields_info
242 }
243
244 pub fn iter_records_as<R: ReadableRecord>(&mut self) -> RecordIterator<'_, T, R> {
246 let record_size: usize = self
247 .fields_info
248 .iter()
249 .map(|i| i.field_length as usize)
250 .sum();
251 RecordIterator {
252 reader: self,
253 record_type: std::marker::PhantomData,
254 current_record: 0,
255 record_data_buffer: std::io::Cursor::new(vec![0u8; record_size]),
256 field_data_buffer: [0u8; 255],
257 }
258 }
259
260 pub fn iter_records(&mut self) -> RecordIterator<'_, T, Record> {
262 self.iter_records_as::<Record>()
263 }
264
265 pub fn read_as<R: ReadableRecord>(&mut self) -> Result<Vec<R>, Error> {
267 self.iter_records_as::<R>()
269 .collect::<Result<Vec<R>, Error>>()
270 }
271
272 pub fn read(&mut self) -> Result<Vec<Record>, Error> {
286 self.iter_records().collect::<Result<Vec<Record>, Error>>()
288 }
289
290 pub fn seek(&mut self, index: usize) -> Result<(), Error> {
292 let offset = self.header.offset_to_first_record as usize
293 + (index * self.header.size_of_record as usize);
294 self.source
295 .seek(SeekFrom::Start(offset as u64))
296 .map_err(|err| Error::io_error(err, 0))?;
297 Ok(())
298 }
299
300 pub fn into_table_info(self) -> TableInfo {
319 TableInfo {
320 header: self.header,
321 fields_info: self.fields_info,
322 encoding: self.encoding,
323 }
324 }
325}
326
327impl Reader<BufReader<File>> {
328 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
339 let p = path.as_ref().to_owned();
340 let bufreader =
341 BufReader::new(File::open(path).map_err(|error| Error::io_error(error, 0))?);
342 let mut reader = Reader::new(bufreader)?;
343 let at_least_one_field_is_memo = reader
344 .fields_info
345 .iter()
346 .any(|f_info| f_info.field_type == FieldType::Memo);
347
348 if at_least_one_field_is_memo {
349 let memo_type = reader.header.file_type.supported_memo_type();
350 if let Some(mt) = memo_type {
351 let memo_path = match mt {
352 MemoFileType::DbaseMemo | MemoFileType::DbaseMemo4 => p.with_extension("dbt"),
353 MemoFileType::FoxBaseMemo => p.with_extension("fpt"),
354 };
355
356 let memo_file = File::open(memo_path).map_err(|error| Error {
357 record_num: 0,
358 field: None,
359 kind: ErrorKind::ErrorOpeningMemoFile(error),
360 })?;
361
362 let memo_reader = MemoReader::new(mt, BufReader::new(memo_file))
363 .map_err(|error| Error::io_error(error, 0))?;
364 reader.memo_reader = Some(memo_reader);
365 }
366 }
367 Ok(reader)
368 }
369
370 pub fn from_path_with_encoding<P: AsRef<Path>, E: Encoding + 'static>(
372 path: P,
373 encoding: E,
374 ) -> Result<Self, Error> {
375 let mut reader = Self::from_path(path)?;
376 reader.encoding = DynEncoding::new(encoding);
377 Ok(reader)
378 }
379}
380
381pub struct NamedValue<'a, T> {
384 pub name: &'a str,
386 pub value: T,
388}
389
390pub struct FieldIterator<'a, Source: Read + Seek, MemoSource: Read + Seek> {
397 pub(crate) source: &'a mut Source,
399 pub(crate) fields_info: std::iter::Peekable<std::slice::Iter<'a, FieldInfo>>,
401 pub(crate) memo_reader: &'a mut Option<MemoReader<MemoSource>>,
403 pub(crate) field_data_buffer: &'a mut [u8; 255],
405 pub(crate) encoding: &'a DynEncoding,
407 pub(crate) options: ReadingOptions,
408}
409
410impl<'a, Source: Read + Seek, MemoSource: Read + Seek> FieldIterator<'a, Source, MemoSource> {
411 pub fn read_next_field_impl(&mut self) -> Result<(&'a FieldInfo, FieldValue), FieldIOError> {
413 let field_info = self
414 .fields_info
415 .next()
416 .ok_or_else(FieldIOError::end_of_record)?;
417 Ok((field_info, self.read_field(field_info)?))
418 }
419
420 pub fn read_next_field(&mut self) -> Result<NamedValue<'a, FieldValue>, FieldIOError> {
422 self.read_next_field_impl()
423 .map(|(field_info, field_value)| NamedValue {
424 name: field_info.name(),
425 value: field_value,
426 })
427 }
428
429 pub fn read_next_field_as<F>(&mut self) -> Result<NamedValue<'a, F>, FieldIOError>
432 where
433 F: TryFrom<FieldValue, Error = FieldConversionError>,
434 {
435 self.read_next_field_impl()
436 .and_then(|(field_info, field_value)| match F::try_from(field_value) {
437 Ok(v) => Ok(NamedValue {
438 name: field_info.name(),
439 value: v,
440 }),
441 Err(e) => Err(FieldIOError::new(e.into(), Some(field_info.to_owned()))),
442 })
443 }
444
445 pub fn skip_next_field(&mut self) -> Result<(), FieldIOError> {
450 match self.fields_info.next() {
451 None => Ok(()),
452 Some(field_info) => self.skip_field(field_info),
453 }
454 }
455
456 fn skip_remaining_fields(&mut self) -> Result<(), FieldIOError> {
463 while let Some(field_info) = self.fields_info.next() {
464 self.skip_field(field_info)?;
465 }
466 Ok(())
467 }
468
469 #[cfg(feature = "serde")]
471 pub(crate) fn read_next_field_raw(&mut self) -> Result<Vec<u8>, FieldIOError> {
472 let field_info = self
473 .fields_info
474 .next()
475 .ok_or(FieldIOError::end_of_record())?;
476 let mut buf = vec![0u8; field_info.field_length as usize];
477 self.source.read_exact(&mut buf).map_err(|error| {
478 FieldIOError::new(ErrorKind::IoError(error), Some(field_info.to_owned()))
479 })?;
480 Ok(buf)
481 }
482
483 #[cfg(feature = "serde")]
484 pub(crate) fn peek_next_field(&mut self) -> Result<NamedValue<'a, FieldValue>, FieldIOError> {
485 let field_info = *self.fields_info.peek().ok_or(FieldIOError {
486 field: None,
487 kind: ErrorKind::EndOfRecord,
488 })?;
489 let value = self.read_field(field_info)?;
490 self.source
491 .seek(SeekFrom::Current(-i64::from(field_info.field_length)))
492 .map_err(|error| {
493 FieldIOError::new(ErrorKind::IoError(error), Some(field_info.to_owned()))
494 })?;
495
496 Ok(NamedValue {
497 name: field_info.name(),
498 value,
499 })
500 }
501
502 fn skip_field(&mut self, field_info: &FieldInfo) -> Result<(), FieldIOError> {
504 self.source
505 .seek(SeekFrom::Current(i64::from(field_info.field_length)))
506 .map_err(|error| {
507 FieldIOError::new(ErrorKind::IoError(error), Some(field_info.to_owned()))
508 })?;
509 Ok(())
510 }
511
512 fn read_field(&mut self, field_info: &'a FieldInfo) -> Result<FieldValue, FieldIOError> {
514 let field_data_buffer = &mut self.field_data_buffer[..field_info.length() as usize];
515 self.source.read_exact(field_data_buffer).unwrap();
516 match FieldValue::read_from(
517 field_data_buffer,
518 self.memo_reader,
519 field_info,
520 self.encoding,
521 self.options.character_trim,
522 ) {
523 Ok(value) => Ok(value),
524 Err(kind) => Err(FieldIOError {
525 field: Some(field_info.clone()),
526 kind,
527 }),
528 }
529 }
530}
531
532impl<'a, Source: Read + Seek, MemoSource: Read + Seek> Iterator
533 for FieldIterator<'a, Source, MemoSource>
534{
535 type Item = Result<NamedValue<'a, FieldValue>, FieldIOError>;
536
537 fn next(&mut self) -> Option<Self::Item> {
538 match self.read_next_field() {
539 Err(error) => match error.kind() {
540 ErrorKind::EndOfRecord => None,
541 _ => Some(Err(error)),
542 },
543 Ok(field_value) => Some(Ok(field_value)),
544 }
545 }
546}
547
548impl<Source: Read + Seek, MemoSource: Read + Seek> FusedIterator
549 for FieldIterator<'_, Source, MemoSource>
550{
551}
552
553pub struct RecordIterator<'a, T: Read + Seek, R: ReadableRecord> {
555 reader: &'a mut Reader<T>,
556 record_type: std::marker::PhantomData<R>,
557 current_record: u32,
558 record_data_buffer: std::io::Cursor<Vec<u8>>,
559 field_data_buffer: [u8; 255],
562}
563
564impl<T: Read + Seek, R: ReadableRecord> Iterator for RecordIterator<'_, T, R> {
565 type Item = Result<R, Error>;
566
567 fn next(&mut self) -> Option<Self::Item> {
568 loop {
569 return if self.current_record >= self.reader.header.num_records {
570 None
571 } else {
572 let deletion_flag = DeletionFlag::read_from(&mut self.reader.source).ok()?;
573
574 if deletion_flag == DeletionFlag::Deleted {
575 self.reader
576 .source
577 .seek(SeekFrom::Current(
578 self.record_data_buffer.get_ref().len() as i64
579 ))
580 .ok()?;
581 continue;
582 }
583
584 self.reader
585 .source
586 .read_exact(self.record_data_buffer.get_mut())
587 .ok()?;
588 self.record_data_buffer.set_position(0);
589
590 let mut iter = FieldIterator {
591 source: &mut self.record_data_buffer,
592 fields_info: self.reader.fields_info.iter().peekable(),
593 memo_reader: &mut self.reader.memo_reader,
594 field_data_buffer: &mut self.field_data_buffer,
595 encoding: &self.reader.encoding,
596 options: self.reader.options,
597 };
598
599 let record = R::read_using(&mut iter)
600 .and_then(|record| iter.skip_remaining_fields().and(Ok(record)))
601 .map_err(|error| Error::new(error, self.current_record as usize));
602 self.current_record += 1;
603 Some(record)
604 };
605 }
606 }
607}
608
609pub fn read<P: AsRef<Path>>(path: P) -> Result<Vec<Record>, Error> {
618 let mut reader = Reader::from_path(path)?;
619 reader.read()
620}
621
622#[cfg(test)]
623mod test {
624 use std::fs::File;
625 use std::io::Seek;
626
627 use super::*;
628
629 #[test]
630 fn pos_after_reading() {
631 let file = File::open("tests/data/line.dbf").unwrap();
632 let mut reader = Reader::new(file).unwrap();
633 let pos_after_reading = reader.source.stream_position().unwrap();
634
635 let mut expected_pos = Header::SIZE + ((reader.fields_info.len()) * FieldInfo::SIZE);
636 expected_pos += size_of::<u8>();
638
639 assert_eq!(pos_after_reading, expected_pos as u64);
640 }
641}