dbf/
lib.rs

1#[macro_use]
2extern crate nom;
3
4
5use std::fmt;
6use nom::*;
7use std::path::Path;
8use std::fs::File;
9use std::io::{Read, Seek, SeekFrom};
10use std::collections::HashMap;
11
12fn read_bytes<R: Read+Seek>(input: &mut R, start: u64, length: usize) -> Result<Vec<u8>, String> {
13    //println!("Want to read from start={} for length={} bytes to end={}", start, length, start+length as u64);
14    let mut res_vec = vec![0; length];
15
16    try!(input.seek(SeekFrom::Start(start)).map_err(|_| "couldn't seek".to_string()));
17
18    try!(input.read_exact(&mut res_vec).map_err(|_| "Couldn't read bytes".to_string()));
19    Ok(res_vec)
20}
21
22named!(parse_header<(i32, i16, i16)>,
23   do_parse!(
24       take!(1) >>   // level?
25       take!(3) >>   // date last modified
26       num_recs: le_i32         >>
27       bytes_in_header: le_i16  >>
28       bytes_in_rec: le_i16     >>
29       take!(2) >>  // res. fill w/ zero
30       take!(1) >>  // flag: incomplete transaction
31       take!(1) >>  // encryption flag
32       take!(12) >> // res. multi-user proc.
33       take!(1) >>  // prod. mdx flag
34       take!(1) >>  // lang. drv. id
35       take!(2) >>  // res.
36
37       ( (num_recs, bytes_in_header, bytes_in_rec) )
38   )
39);
40
41fn parse_field_name(i: &[u8]) -> String {
42    // TODO this accepts UTF8, when it should only accept ASCII
43    ::std::str::from_utf8(i).unwrap().trim_right_matches('\x00').to_string()
44}
45
46named!(parse_field_descriptor<FieldHeader>,
47    do_parse!(
48        // FIXME use convert name to String here
49        name: take!(11) >>
50        field_type: take!(1) >>
51        take!(4) >>      // res.
52        field_length: be_u8 >>
53        decimal_count: be_u8 >>
54        take!(2) >>      // work area ID
55        take!(1) >>      // ex.
56        take!(10)>>      // res.
57        take!(1) >>      // prod. mdx flag
58        ({
59            let field_type = ::std::str::from_utf8(field_type).unwrap().to_string().remove(0);
60            let field_type = match field_type {
61                'N' => FieldType::Numeric,
62                'C' => FieldType::Character,
63                _ => { panic!("Unknown char {:?}", field_type) },
64            };
65
66            FieldHeader{
67                name: parse_field_name(name),
68                field_type: field_type,
69                field_length: field_length,
70                decimal_count: decimal_count,
71           }
72        })
73    )
74);
75
76#[derive(Debug)]
77pub struct DbfFile<R: Read+Seek> {
78    _dbf_file_handle: R,
79    _fields: Vec<FieldHeader>,
80    _num_recs: u32,
81    _bytes_in_rec: u16,
82}
83
84pub struct DbfRecordIterator<R: Read+Seek> {
85    _dbf_file: DbfFile<R>,
86    _next_rec: u32,
87}
88
89
90#[derive(Debug, Clone)]
91pub enum FieldType {
92    Character,
93    Numeric,
94    // FIXME more types
95}
96
97#[derive(Debug)]
98pub enum Field {
99    Character(String),
100    Numeric(f64),
101    Null,
102    // FIXME more types
103}
104
105impl fmt::Display for Field {
106    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
107        match self {
108            &Field::Character(ref s) => write!(f, "{}", s),
109            &Field::Numeric(ref n) => write!(f, "{}", n),
110            &Field::Null => write!(f, "(NULL)"),
111        }
112    }
113}
114
115
116#[derive(Debug, Clone)]
117pub struct FieldHeader {
118    pub name: String,
119    pub field_type: FieldType,
120    pub field_length: u8,
121    pub decimal_count: u8,
122}
123
124pub type Record = HashMap<String, Field>;
125
126impl DbfFile<File> {
127    pub fn open_file(filename: &Path) -> Self {
128        let dbf_file = File::open(filename).unwrap();
129        DbfFile::open(dbf_file)
130    }
131}
132
133impl<R> DbfFile<R> where R: Read+Seek {
134    pub fn open(mut dbf_file: R) -> Self where R: Read+Seek {
135        let header_bytes = read_bytes(&mut dbf_file, 0, 32).unwrap();
136        let (num_recs, bytes_in_header, bytes_in_rec) = parse_header(&header_bytes).to_result().unwrap();
137        // -1 is for the \x0D separator
138        // last -1 is maybe because of an off by one error? FIXME
139        let num_headers = (bytes_in_header - 1) / 32 - 1;
140
141        let fields: Vec<_> = read_bytes(&mut dbf_file, 32, (num_headers*32) as usize).unwrap().chunks(32).map(|b| parse_field_descriptor(b).to_result().unwrap()).collect();
142
143        DbfFile{ _dbf_file_handle: dbf_file, _fields: fields, _num_recs: num_recs as u32, _bytes_in_rec: bytes_in_rec as u16 }
144    }
145
146    pub fn record(&mut self, rec_id: u32) -> Option<Record> {
147        if rec_id >= self._num_recs {
148            return None;
149        }
150
151        let header_length = (32 + 32 * self._fields.len() + 2) as u64;
152        let bytes = read_bytes(&mut self._dbf_file_handle, header_length + (rec_id as u64*self._bytes_in_rec as u64), self._bytes_in_rec as usize).or_else(|e| {
153            if rec_id == self._num_recs - 1 {
154                // If there's an error and it's the last record, then for some reason it works if
155                // we take one byte less.
156                read_bytes(&mut self._dbf_file_handle, header_length + (rec_id as u64*self._bytes_in_rec as u64), self._bytes_in_rec as usize - 1)
157            } else {
158                Err(e)
159            }
160        }).unwrap();
161        let mut offset: usize = 0;
162        let mut fields = HashMap::with_capacity(self._fields.len());
163
164        for field in self._fields.iter() {
165            let this_field_bytes: Vec<_> = bytes.iter().skip(offset).take(field.field_length as usize).map(|x| x.clone()).collect();
166            offset = offset + field.field_length as usize;
167
168            let this_field_ascii = String::from_utf8(this_field_bytes).unwrap().trim().to_owned();
169
170            // Is this field a Character
171            // FIXME gotta be a better way to do this
172            let is_char = match field.field_type { FieldType::Character => true, _ => false };
173
174            // Spec says that a string '*' means NULL, but empty strings are also viewed as null by
175            // some software
176            let is_null = this_field_ascii.chars().nth(0) == Some('*') || (this_field_ascii.len() == 0 && is_char );
177
178            let value = if is_null {
179                Field::Null
180            } else {
181                match field.field_type {
182                    FieldType::Character => Field::Character(this_field_ascii),
183                    FieldType::Numeric => Field::Numeric(this_field_ascii.parse().unwrap()),
184                }
185            };
186
187            fields.insert(field.name.clone(), value);
188        }
189
190        Some(fields)
191    }
192
193    pub fn records(self) -> DbfRecordIterator<R> {
194        DbfRecordIterator{ _dbf_file: self, _next_rec: 0 }
195    }
196
197    pub fn num_records(&self) -> u32 {
198        return self._num_recs
199    }
200
201    pub fn headers(&self) -> &Vec<FieldHeader> {
202        &self._fields
203    }
204}
205
206impl<R> DbfRecordIterator<R> where R: Read+Seek {
207    pub fn into_inner(self) -> DbfFile<R> {
208        self._dbf_file
209    }
210}
211
212impl<R> Iterator for DbfRecordIterator<R> where R: Read+Seek {
213    type Item = Record;
214
215    fn next(&mut self) -> Option<Record> {
216        if self._next_rec >= self._dbf_file._num_recs {
217            None
218        } else {
219            let rec = self._dbf_file.record(self._next_rec);
220            self._next_rec = self._next_rec + 1;
221            rec
222        }
223    }
224
225    fn size_hint(&self) -> (usize, Option<usize>) {
226        (self._dbf_file._num_recs as usize, Some(self._dbf_file._num_recs as usize))
227    }
228}