1#[macro_use]
2extern crate nom;
3
4
5use std::fmt;
6use nom::*;
7use std::path::Path;
8use std::fs::File;
9use std::io::{Read, Seek, SeekFrom};
10use std::collections::HashMap;
11
12fn read_bytes<R: Read+Seek>(input: &mut R, start: u64, length: usize) -> Result<Vec<u8>, String> {
13 let mut res_vec = vec![0; length];
15
16 try!(input.seek(SeekFrom::Start(start)).map_err(|_| "couldn't seek".to_string()));
17
18 try!(input.read_exact(&mut res_vec).map_err(|_| "Couldn't read bytes".to_string()));
19 Ok(res_vec)
20}
21
22named!(parse_header<(i32, i16, i16)>,
23 do_parse!(
24 take!(1) >> take!(3) >> num_recs: le_i32 >>
27 bytes_in_header: le_i16 >>
28 bytes_in_rec: le_i16 >>
29 take!(2) >> take!(1) >> take!(1) >> take!(12) >> take!(1) >> take!(1) >> take!(2) >> ( (num_recs, bytes_in_header, bytes_in_rec) )
38 )
39);
40
41fn parse_field_name(i: &[u8]) -> String {
42 ::std::str::from_utf8(i).unwrap().trim_right_matches('\x00').to_string()
44}
45
46named!(parse_field_descriptor<FieldHeader>,
47 do_parse!(
48 name: take!(11) >>
50 field_type: take!(1) >>
51 take!(4) >> field_length: be_u8 >>
53 decimal_count: be_u8 >>
54 take!(2) >> take!(1) >> take!(10)>> take!(1) >> ({
59 let field_type = ::std::str::from_utf8(field_type).unwrap().to_string().remove(0);
60 let field_type = match field_type {
61 'N' => FieldType::Numeric,
62 'C' => FieldType::Character,
63 _ => { panic!("Unknown char {:?}", field_type) },
64 };
65
66 FieldHeader{
67 name: parse_field_name(name),
68 field_type: field_type,
69 field_length: field_length,
70 decimal_count: decimal_count,
71 }
72 })
73 )
74);
75
76#[derive(Debug)]
77pub struct DbfFile<R: Read+Seek> {
78 _dbf_file_handle: R,
79 _fields: Vec<FieldHeader>,
80 _num_recs: u32,
81 _bytes_in_rec: u16,
82}
83
84pub struct DbfRecordIterator<R: Read+Seek> {
85 _dbf_file: DbfFile<R>,
86 _next_rec: u32,
87}
88
89
90#[derive(Debug, Clone)]
91pub enum FieldType {
92 Character,
93 Numeric,
94 }
96
97#[derive(Debug)]
98pub enum Field {
99 Character(String),
100 Numeric(f64),
101 Null,
102 }
104
105impl fmt::Display for Field {
106 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
107 match self {
108 &Field::Character(ref s) => write!(f, "{}", s),
109 &Field::Numeric(ref n) => write!(f, "{}", n),
110 &Field::Null => write!(f, "(NULL)"),
111 }
112 }
113}
114
115
116#[derive(Debug, Clone)]
117pub struct FieldHeader {
118 pub name: String,
119 pub field_type: FieldType,
120 pub field_length: u8,
121 pub decimal_count: u8,
122}
123
124pub type Record = HashMap<String, Field>;
125
126impl DbfFile<File> {
127 pub fn open_file(filename: &Path) -> Self {
128 let dbf_file = File::open(filename).unwrap();
129 DbfFile::open(dbf_file)
130 }
131}
132
133impl<R> DbfFile<R> where R: Read+Seek {
134 pub fn open(mut dbf_file: R) -> Self where R: Read+Seek {
135 let header_bytes = read_bytes(&mut dbf_file, 0, 32).unwrap();
136 let (num_recs, bytes_in_header, bytes_in_rec) = parse_header(&header_bytes).to_result().unwrap();
137 let num_headers = (bytes_in_header - 1) / 32 - 1;
140
141 let fields: Vec<_> = read_bytes(&mut dbf_file, 32, (num_headers*32) as usize).unwrap().chunks(32).map(|b| parse_field_descriptor(b).to_result().unwrap()).collect();
142
143 DbfFile{ _dbf_file_handle: dbf_file, _fields: fields, _num_recs: num_recs as u32, _bytes_in_rec: bytes_in_rec as u16 }
144 }
145
146 pub fn record(&mut self, rec_id: u32) -> Option<Record> {
147 if rec_id >= self._num_recs {
148 return None;
149 }
150
151 let header_length = (32 + 32 * self._fields.len() + 2) as u64;
152 let bytes = read_bytes(&mut self._dbf_file_handle, header_length + (rec_id as u64*self._bytes_in_rec as u64), self._bytes_in_rec as usize).or_else(|e| {
153 if rec_id == self._num_recs - 1 {
154 read_bytes(&mut self._dbf_file_handle, header_length + (rec_id as u64*self._bytes_in_rec as u64), self._bytes_in_rec as usize - 1)
157 } else {
158 Err(e)
159 }
160 }).unwrap();
161 let mut offset: usize = 0;
162 let mut fields = HashMap::with_capacity(self._fields.len());
163
164 for field in self._fields.iter() {
165 let this_field_bytes: Vec<_> = bytes.iter().skip(offset).take(field.field_length as usize).map(|x| x.clone()).collect();
166 offset = offset + field.field_length as usize;
167
168 let this_field_ascii = String::from_utf8(this_field_bytes).unwrap().trim().to_owned();
169
170 let is_char = match field.field_type { FieldType::Character => true, _ => false };
173
174 let is_null = this_field_ascii.chars().nth(0) == Some('*') || (this_field_ascii.len() == 0 && is_char );
177
178 let value = if is_null {
179 Field::Null
180 } else {
181 match field.field_type {
182 FieldType::Character => Field::Character(this_field_ascii),
183 FieldType::Numeric => Field::Numeric(this_field_ascii.parse().unwrap()),
184 }
185 };
186
187 fields.insert(field.name.clone(), value);
188 }
189
190 Some(fields)
191 }
192
193 pub fn records(self) -> DbfRecordIterator<R> {
194 DbfRecordIterator{ _dbf_file: self, _next_rec: 0 }
195 }
196
197 pub fn num_records(&self) -> u32 {
198 return self._num_recs
199 }
200
201 pub fn headers(&self) -> &Vec<FieldHeader> {
202 &self._fields
203 }
204}
205
206impl<R> DbfRecordIterator<R> where R: Read+Seek {
207 pub fn into_inner(self) -> DbfFile<R> {
208 self._dbf_file
209 }
210}
211
212impl<R> Iterator for DbfRecordIterator<R> where R: Read+Seek {
213 type Item = Record;
214
215 fn next(&mut self) -> Option<Record> {
216 if self._next_rec >= self._dbf_file._num_recs {
217 None
218 } else {
219 let rec = self._dbf_file.record(self._next_rec);
220 self._next_rec = self._next_rec + 1;
221 rec
222 }
223 }
224
225 fn size_hint(&self) -> (usize, Option<usize>) {
226 (self._dbf_file._num_recs as usize, Some(self._dbf_file._num_recs as usize))
227 }
228}