Skip to main content

oxigdal_shapefile/dbf/
mod.rs

1//! DBF (.dbf) attribute file handling
2//!
3//! This module handles reading and writing dBase III/IV (.dbf) files,
4//! which contain the attribute data for Shapefile features.
5
6pub mod record;
7
8pub use record::{FieldDescriptor, FieldType, FieldValue};
9
10use crate::error::{Result, ShapefileError};
11use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
12use std::collections::HashMap;
13use std::io::{Read, Seek, Write};
14
15/// DBF header size in bytes
16pub const DBF_HEADER_SIZE: usize = 32;
17
18/// DBF field descriptor size in bytes
19pub const FIELD_DESCRIPTOR_SIZE: usize = 32;
20
21/// DBF header terminator
22pub const HEADER_TERMINATOR: u8 = 0x0D;
23
24/// DBF record deletion marker (for deleted records)
25pub const RECORD_DELETED: u8 = 0x2A; // '*'
26
27/// DBF record active marker (for active records)
28pub const RECORD_ACTIVE: u8 = 0x20; // ' '
29
30/// DBF file terminator
31pub const FILE_TERMINATOR: u8 = 0x1A;
32
33/// DBF header
34#[derive(Debug, Clone)]
35pub struct DbfHeader {
36    /// Version (3 for dBase III, 4 for dBase IV)
37    pub version: u8,
38    /// Last update year (YY, e.g., 24 for 2024)
39    pub year: u8,
40    /// Last update month (1-12)
41    pub month: u8,
42    /// Last update day (1-31)
43    pub day: u8,
44    /// Number of records
45    pub record_count: u32,
46    /// Header size in bytes (including field descriptors)
47    pub header_size: u16,
48    /// Record size in bytes
49    pub record_size: u16,
50    /// Code page (for character encoding)
51    pub code_page: u8,
52}
53
54impl DbfHeader {
55    /// Creates a new DBF header
56    pub fn new(record_count: u32, field_descriptors: &[FieldDescriptor]) -> Result<Self> {
57        // Calculate record size (1 byte for deletion flag + sum of field lengths)
58        let record_size: usize = 1 + field_descriptors
59            .iter()
60            .map(|f| f.length as usize)
61            .sum::<usize>();
62
63        // Calculate header size (32 bytes header + field descriptors + terminator)
64        let header_size = DBF_HEADER_SIZE + (field_descriptors.len() * FIELD_DESCRIPTOR_SIZE) + 1;
65
66        // Get current date
67        let now = std::time::SystemTime::now();
68        let duration = now.duration_since(std::time::UNIX_EPOCH).map_err(|_| {
69            ShapefileError::InvalidDbfHeader {
70                message: "failed to get current time".to_string(),
71            }
72        })?;
73
74        // Simple date calculation (approximation)
75        let days_since_epoch = duration.as_secs() / 86400;
76        let year = ((days_since_epoch / 365) % 100) as u8; // Last 2 digits
77        let month = 1; // Default to January
78        let day = 1; // Default to 1st
79
80        Ok(Self {
81            version: 3, // dBase III
82            year,
83            month,
84            day,
85            record_count,
86            header_size: header_size as u16,
87            record_size: record_size as u16,
88            code_page: 0, // No specific code page
89        })
90    }
91
92    /// Reads a DBF header from a reader
93    pub fn read<R: Read>(reader: &mut R) -> Result<Self> {
94        // Read version (1 byte)
95        let mut version = [0u8; 1];
96        reader
97            .read_exact(&mut version)
98            .map_err(|_| ShapefileError::unexpected_eof("reading dbf version"))?;
99
100        // Read last update date (3 bytes: YY, MM, DD)
101        let mut date = [0u8; 3];
102        reader
103            .read_exact(&mut date)
104            .map_err(|_| ShapefileError::unexpected_eof("reading dbf date"))?;
105
106        // Read record count (4 bytes, little endian)
107        let record_count = reader
108            .read_u32::<LittleEndian>()
109            .map_err(|_| ShapefileError::unexpected_eof("reading record count"))?;
110
111        // Read header size (2 bytes, little endian)
112        let header_size = reader
113            .read_u16::<LittleEndian>()
114            .map_err(|_| ShapefileError::unexpected_eof("reading header size"))?;
115
116        // Read record size (2 bytes, little endian)
117        let record_size = reader
118            .read_u16::<LittleEndian>()
119            .map_err(|_| ShapefileError::unexpected_eof("reading record size"))?;
120
121        // Skip reserved bytes (20 bytes)
122        let mut reserved = [0u8; 20];
123        reader
124            .read_exact(&mut reserved)
125            .map_err(|_| ShapefileError::unexpected_eof("reading dbf reserved bytes"))?;
126
127        // Code page is at byte 29 (in the reserved area)
128        let code_page = reserved[19];
129
130        Ok(Self {
131            version: version[0],
132            year: date[0],
133            month: date[1],
134            day: date[2],
135            record_count,
136            header_size,
137            record_size,
138            code_page,
139        })
140    }
141
142    /// Writes a DBF header to a writer
143    pub fn write<W: Write>(&self, writer: &mut W) -> Result<()> {
144        // Write version (1 byte)
145        writer
146            .write_all(&[self.version])
147            .map_err(ShapefileError::Io)?;
148
149        // Write last update date (3 bytes)
150        writer
151            .write_all(&[self.year, self.month, self.day])
152            .map_err(ShapefileError::Io)?;
153
154        // Write record count (4 bytes, little endian)
155        writer
156            .write_u32::<LittleEndian>(self.record_count)
157            .map_err(ShapefileError::Io)?;
158
159        // Write header size (2 bytes, little endian)
160        writer
161            .write_u16::<LittleEndian>(self.header_size)
162            .map_err(ShapefileError::Io)?;
163
164        // Write record size (2 bytes, little endian)
165        writer
166            .write_u16::<LittleEndian>(self.record_size)
167            .map_err(ShapefileError::Io)?;
168
169        // Write reserved bytes (20 bytes, with code page at position 19)
170        let mut reserved = [0u8; 20];
171        reserved[19] = self.code_page;
172        writer.write_all(&reserved).map_err(ShapefileError::Io)?;
173
174        Ok(())
175    }
176}
177
178/// A DBF record (row of attribute data)
179#[derive(Debug, Clone)]
180pub struct DbfRecord {
181    /// Field values (in order)
182    pub values: Vec<FieldValue>,
183    /// Whether this record is deleted
184    pub deleted: bool,
185}
186
187impl DbfRecord {
188    /// Creates a new DBF record
189    pub fn new(values: Vec<FieldValue>) -> Self {
190        Self {
191            values,
192            deleted: false,
193        }
194    }
195
196    /// Reads a DBF record from a reader
197    pub fn read<R: Read>(reader: &mut R, field_descriptors: &[FieldDescriptor]) -> Result<Self> {
198        // Read deletion marker (1 byte)
199        let mut marker = [0u8; 1];
200        reader
201            .read_exact(&mut marker)
202            .map_err(|_| ShapefileError::unexpected_eof("reading record marker"))?;
203
204        let deleted = marker[0] == RECORD_DELETED;
205
206        // Read field values
207        let mut values = Vec::with_capacity(field_descriptors.len());
208        for field in field_descriptors {
209            let mut field_bytes = vec![0u8; field.length as usize];
210            reader
211                .read_exact(&mut field_bytes)
212                .map_err(|_| ShapefileError::unexpected_eof("reading field value"))?;
213
214            let value = FieldValue::parse(&field_bytes, field.field_type, field.decimal_count)?;
215            values.push(value);
216        }
217
218        Ok(Self { values, deleted })
219    }
220
221    /// Writes a DBF record to a writer
222    pub fn write<W: Write>(
223        &self,
224        writer: &mut W,
225        field_descriptors: &[FieldDescriptor],
226    ) -> Result<()> {
227        // Write deletion marker
228        let marker = if self.deleted {
229            RECORD_DELETED
230        } else {
231            RECORD_ACTIVE
232        };
233        writer.write_all(&[marker]).map_err(ShapefileError::Io)?;
234
235        // Write field values
236        if self.values.len() != field_descriptors.len() {
237            return Err(ShapefileError::DbfError {
238                message: format!(
239                    "value count mismatch: expected {}, got {}",
240                    field_descriptors.len(),
241                    self.values.len()
242                ),
243                field: None,
244                record: None,
245            });
246        }
247
248        for (value, field) in self.values.iter().zip(field_descriptors) {
249            let field_bytes = value.format(field.length as usize);
250            writer.write_all(&field_bytes).map_err(ShapefileError::Io)?;
251        }
252
253        Ok(())
254    }
255
256    /// Returns values as a HashMap (field name -> value)
257    pub fn to_map(&self, field_descriptors: &[FieldDescriptor]) -> HashMap<String, FieldValue> {
258        field_descriptors
259            .iter()
260            .zip(&self.values)
261            .map(|(field, value)| (field.name.clone(), value.clone()))
262            .collect()
263    }
264}
265
266/// DBF (.dbf) reader
267pub struct DbfReader<R: Read> {
268    reader: R,
269    header: DbfHeader,
270    field_descriptors: Vec<FieldDescriptor>,
271}
272
273impl<R: Read> DbfReader<R> {
274    /// Creates a new DBF reader
275    pub fn new(mut reader: R) -> Result<Self> {
276        // Read header
277        let header = DbfHeader::read(&mut reader)?;
278
279        // Calculate number of field descriptors
280        let num_fields =
281            (header.header_size as usize - DBF_HEADER_SIZE - 1) / FIELD_DESCRIPTOR_SIZE;
282
283        // Read field descriptors
284        let mut field_descriptors = Vec::with_capacity(num_fields);
285        for _ in 0..num_fields {
286            let descriptor = FieldDescriptor::read(&mut reader)?;
287            field_descriptors.push(descriptor);
288        }
289
290        // Read header terminator
291        let mut terminator = [0u8; 1];
292        reader
293            .read_exact(&mut terminator)
294            .map_err(|_| ShapefileError::unexpected_eof("reading header terminator"))?;
295
296        if terminator[0] != HEADER_TERMINATOR {
297            return Err(ShapefileError::InvalidDbfHeader {
298                message: format!(
299                    "invalid header terminator: expected {}, got {}",
300                    HEADER_TERMINATOR, terminator[0]
301                ),
302            });
303        }
304
305        Ok(Self {
306            reader,
307            header,
308            field_descriptors,
309        })
310    }
311
312    /// Returns the header
313    pub fn header(&self) -> &DbfHeader {
314        &self.header
315    }
316
317    /// Returns the field descriptors
318    pub fn field_descriptors(&self) -> &[FieldDescriptor] {
319        &self.field_descriptors
320    }
321
322    /// Reads the next record
323    pub fn read_record(&mut self) -> Result<Option<DbfRecord>> {
324        match DbfRecord::read(&mut self.reader, &self.field_descriptors) {
325            Ok(record) => Ok(Some(record)),
326            Err(ShapefileError::Io(ref e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
327                Ok(None)
328            }
329            Err(ShapefileError::UnexpectedEof { .. }) => {
330                // EOF when reading record is expected at end of file
331                Ok(None)
332            }
333            Err(e) => Err(e),
334        }
335    }
336
337    /// Reads all records
338    pub fn read_all_records(&mut self) -> Result<Vec<DbfRecord>> {
339        let mut records = Vec::with_capacity(self.header.record_count as usize);
340        while let Some(record) = self.read_record()? {
341            // Check for file terminator
342            if records.len() >= self.header.record_count as usize {
343                break;
344            }
345            records.push(record);
346        }
347        Ok(records)
348    }
349}
350
351/// DBF (.dbf) writer
352pub struct DbfWriter<W: Write> {
353    writer: W,
354    header: DbfHeader,
355    field_descriptors: Vec<FieldDescriptor>,
356    record_count: u32,
357}
358
359impl<W: Write> DbfWriter<W> {
360    /// Creates a new DBF writer
361    pub fn new(writer: W, field_descriptors: Vec<FieldDescriptor>) -> Result<Self> {
362        let header = DbfHeader::new(0, &field_descriptors)?;
363        Ok(Self {
364            writer,
365            header,
366            field_descriptors,
367            record_count: 0,
368        })
369    }
370
371    /// Writes the header (should be called first)
372    pub fn write_header(&mut self) -> Result<()> {
373        // Update header with current record count
374        self.header.record_count = self.record_count;
375        self.header.write(&mut self.writer)?;
376
377        // Write field descriptors
378        for field in &self.field_descriptors {
379            field.write(&mut self.writer)?;
380        }
381
382        // Write header terminator
383        self.writer
384            .write_all(&[HEADER_TERMINATOR])
385            .map_err(ShapefileError::Io)?;
386
387        Ok(())
388    }
389
390    /// Writes a record
391    pub fn write_record(&mut self, record: &DbfRecord) -> Result<()> {
392        record.write(&mut self.writer, &self.field_descriptors)?;
393        self.record_count += 1;
394        Ok(())
395    }
396
397    /// Flushes the internal writer
398    pub fn flush(&mut self) -> Result<()> {
399        self.writer.flush().map_err(ShapefileError::Io)
400    }
401
402    /// Finalizes the file (updates header with record count and writes terminator)
403    pub fn finalize(mut self) -> Result<()> {
404        // Write file terminator
405        self.writer
406            .write_all(&[FILE_TERMINATOR])
407            .map_err(ShapefileError::Io)?;
408
409        Ok(())
410    }
411}
412
413impl<W: Write + Seek> DbfWriter<W> {
414    /// Updates the record count in the header (for seekable writers)
415    pub fn update_record_count(&mut self) -> Result<()> {
416        use byteorder::WriteBytesExt;
417
418        // Update header record count
419        self.header.record_count = self.record_count;
420
421        // Seek to record count position in header (byte 4)
422        self.writer
423            .seek(std::io::SeekFrom::Start(4))
424            .map_err(ShapefileError::Io)?;
425
426        // Write record count (little endian)
427        self.writer
428            .write_u32::<LittleEndian>(self.record_count)
429            .map_err(ShapefileError::Io)?;
430
431        // Flush to ensure the update is written
432        self.writer.flush().map_err(ShapefileError::Io)?;
433
434        // Seek back to end of file
435        self.writer
436            .seek(std::io::SeekFrom::End(0))
437            .map_err(ShapefileError::Io)?;
438
439        Ok(())
440    }
441}
442
443impl DbfWriter<std::fs::File> {
444    /// Syncs all data to disk (only available for File writers)
445    pub fn sync_all(&mut self) -> Result<()> {
446        self.writer.sync_all().map_err(ShapefileError::Io)
447    }
448}
449
450#[cfg(test)]
451mod tests {
452    use super::*;
453    use std::io::Cursor;
454
455    #[test]
456    fn test_dbf_header_round_trip() {
457        let fields = vec![
458            FieldDescriptor::new("NAME".to_string(), FieldType::Character, 50, 0)
459                .expect("valid NAME field descriptor"),
460            FieldDescriptor::new("VALUE".to_string(), FieldType::Number, 10, 2)
461                .expect("valid VALUE field descriptor"),
462        ];
463
464        let header = DbfHeader::new(10, &fields).expect("valid dbf header");
465
466        let mut buffer = Vec::new();
467        header.write(&mut buffer).expect("write dbf header");
468
469        assert_eq!(buffer.len(), DBF_HEADER_SIZE);
470
471        let mut cursor = Cursor::new(buffer);
472        let read_header = DbfHeader::read(&mut cursor).expect("read dbf header");
473
474        assert_eq!(read_header.version, 3);
475        assert_eq!(read_header.record_count, 10);
476    }
477
478    #[test]
479    fn test_dbf_record_round_trip() {
480        let fields = vec![
481            FieldDescriptor::new("NAME".to_string(), FieldType::Character, 10, 0)
482                .expect("valid NAME field descriptor"),
483            FieldDescriptor::new("AGE".to_string(), FieldType::Number, 3, 0)
484                .expect("valid AGE field descriptor"),
485        ];
486
487        let record = DbfRecord::new(vec![
488            FieldValue::String("Alice".to_string()),
489            FieldValue::Integer(30),
490        ]);
491
492        let mut buffer = Vec::new();
493        record
494            .write(&mut buffer, &fields)
495            .expect("write dbf record");
496
497        let mut cursor = Cursor::new(buffer);
498        let read_record = DbfRecord::read(&mut cursor, &fields).expect("read dbf record");
499
500        assert!(!read_record.deleted);
501        assert_eq!(read_record.values.len(), 2);
502    }
503
504    #[test]
505    fn test_dbf_reader_writer() {
506        let fields = vec![
507            FieldDescriptor::new("NAME".to_string(), FieldType::Character, 20, 0)
508                .expect("valid field"),
509            FieldDescriptor::new("VALUE".to_string(), FieldType::Number, 10, 2)
510                .expect("valid field"),
511        ];
512
513        let mut buffer = Cursor::new(Vec::new());
514
515        // Collect records and write
516        let records = vec![
517            DbfRecord::new(vec![
518                FieldValue::String("Test1".to_string()),
519                FieldValue::Float(123.45),
520            ]),
521            DbfRecord::new(vec![
522                FieldValue::String("Test2".to_string()),
523                FieldValue::Float(678.90),
524            ]),
525        ];
526
527        // Create header with known record count
528        let header = DbfHeader::new(records.len() as u32, &fields).expect("valid header");
529
530        // Write header
531        header.write(&mut buffer).expect("write header");
532
533        // Write field descriptors
534        for field in &fields {
535            field.write(&mut buffer).expect("write field");
536        }
537        buffer
538            .write_all(&[HEADER_TERMINATOR])
539            .expect("write terminator");
540
541        // Write records
542        for record in &records {
543            record.write(&mut buffer, &fields).expect("write record");
544        }
545
546        // Write terminator
547        buffer.write_all(&[FILE_TERMINATOR]).expect("write EOF");
548
549        // Read
550        buffer.set_position(0);
551        let mut reader = DbfReader::new(buffer).expect("create reader");
552
553        assert_eq!(reader.field_descriptors().len(), 2);
554
555        // Check buffer length
556        let expected_record_size = 1 + 20 + 10; // deletion flag + NAME field + VALUE field
557        let _expected_size =
558            DBF_HEADER_SIZE + (2 * FIELD_DESCRIPTOR_SIZE) + 1 + (2 * expected_record_size) + 1;
559
560        let read_records = reader.read_all_records().expect("read records");
561        assert_eq!(read_records.len(), 2);
562    }
563}