pub mod record;
pub use record::{FieldDescriptor, FieldType, FieldValue};
use crate::error::{Result, ShapefileError};
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::collections::HashMap;
use std::io::{Read, Seek, Write};
pub const DBF_HEADER_SIZE: usize = 32;
pub const FIELD_DESCRIPTOR_SIZE: usize = 32;
pub const HEADER_TERMINATOR: u8 = 0x0D;
pub const RECORD_DELETED: u8 = 0x2A;
pub const RECORD_ACTIVE: u8 = 0x20;
pub const FILE_TERMINATOR: u8 = 0x1A;
#[derive(Debug, Clone)]
pub struct DbfHeader {
pub version: u8,
pub year: u8,
pub month: u8,
pub day: u8,
pub record_count: u32,
pub header_size: u16,
pub record_size: u16,
pub code_page: u8,
}
impl DbfHeader {
pub fn new(record_count: u32, field_descriptors: &[FieldDescriptor]) -> Result<Self> {
let record_size: usize = 1 + field_descriptors
.iter()
.map(|f| f.length as usize)
.sum::<usize>();
let header_size = DBF_HEADER_SIZE + (field_descriptors.len() * FIELD_DESCRIPTOR_SIZE) + 1;
let now = std::time::SystemTime::now();
let duration = now.duration_since(std::time::UNIX_EPOCH).map_err(|_| {
ShapefileError::InvalidDbfHeader {
message: "failed to get current time".to_string(),
}
})?;
let days_since_epoch = duration.as_secs() / 86400;
let year = ((days_since_epoch / 365) % 100) as u8; let month = 1; let day = 1;
Ok(Self {
version: 3, year,
month,
day,
record_count,
header_size: header_size as u16,
record_size: record_size as u16,
code_page: 0, })
}
pub fn read<R: Read>(reader: &mut R) -> Result<Self> {
let mut version = [0u8; 1];
reader
.read_exact(&mut version)
.map_err(|_| ShapefileError::unexpected_eof("reading dbf version"))?;
let mut date = [0u8; 3];
reader
.read_exact(&mut date)
.map_err(|_| ShapefileError::unexpected_eof("reading dbf date"))?;
let record_count = reader
.read_u32::<LittleEndian>()
.map_err(|_| ShapefileError::unexpected_eof("reading record count"))?;
let header_size = reader
.read_u16::<LittleEndian>()
.map_err(|_| ShapefileError::unexpected_eof("reading header size"))?;
let record_size = reader
.read_u16::<LittleEndian>()
.map_err(|_| ShapefileError::unexpected_eof("reading record size"))?;
let mut reserved = [0u8; 20];
reader
.read_exact(&mut reserved)
.map_err(|_| ShapefileError::unexpected_eof("reading dbf reserved bytes"))?;
let code_page = reserved[19];
Ok(Self {
version: version[0],
year: date[0],
month: date[1],
day: date[2],
record_count,
header_size,
record_size,
code_page,
})
}
pub fn write<W: Write>(&self, writer: &mut W) -> Result<()> {
writer
.write_all(&[self.version])
.map_err(ShapefileError::Io)?;
writer
.write_all(&[self.year, self.month, self.day])
.map_err(ShapefileError::Io)?;
writer
.write_u32::<LittleEndian>(self.record_count)
.map_err(ShapefileError::Io)?;
writer
.write_u16::<LittleEndian>(self.header_size)
.map_err(ShapefileError::Io)?;
writer
.write_u16::<LittleEndian>(self.record_size)
.map_err(ShapefileError::Io)?;
let mut reserved = [0u8; 20];
reserved[19] = self.code_page;
writer.write_all(&reserved).map_err(ShapefileError::Io)?;
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct DbfRecord {
pub values: Vec<FieldValue>,
pub deleted: bool,
}
impl DbfRecord {
pub fn new(values: Vec<FieldValue>) -> Self {
Self {
values,
deleted: false,
}
}
pub fn read<R: Read>(reader: &mut R, field_descriptors: &[FieldDescriptor]) -> Result<Self> {
let mut marker = [0u8; 1];
reader
.read_exact(&mut marker)
.map_err(|_| ShapefileError::unexpected_eof("reading record marker"))?;
let deleted = marker[0] == RECORD_DELETED;
let mut values = Vec::with_capacity(field_descriptors.len());
for field in field_descriptors {
let mut field_bytes = vec![0u8; field.length as usize];
reader
.read_exact(&mut field_bytes)
.map_err(|_| ShapefileError::unexpected_eof("reading field value"))?;
let value = FieldValue::parse(&field_bytes, field.field_type, field.decimal_count)?;
values.push(value);
}
Ok(Self { values, deleted })
}
pub fn write<W: Write>(
&self,
writer: &mut W,
field_descriptors: &[FieldDescriptor],
) -> Result<()> {
let marker = if self.deleted {
RECORD_DELETED
} else {
RECORD_ACTIVE
};
writer.write_all(&[marker]).map_err(ShapefileError::Io)?;
if self.values.len() != field_descriptors.len() {
return Err(ShapefileError::DbfError {
message: format!(
"value count mismatch: expected {}, got {}",
field_descriptors.len(),
self.values.len()
),
field: None,
record: None,
});
}
for (value, field) in self.values.iter().zip(field_descriptors) {
let field_bytes = value.format(field.length as usize);
writer.write_all(&field_bytes).map_err(ShapefileError::Io)?;
}
Ok(())
}
pub fn to_map(&self, field_descriptors: &[FieldDescriptor]) -> HashMap<String, FieldValue> {
field_descriptors
.iter()
.zip(&self.values)
.map(|(field, value)| (field.name.clone(), value.clone()))
.collect()
}
}
pub struct DbfReader<R: Read> {
reader: R,
header: DbfHeader,
field_descriptors: Vec<FieldDescriptor>,
}
impl<R: Read> DbfReader<R> {
pub fn new(mut reader: R) -> Result<Self> {
let header = DbfHeader::read(&mut reader)?;
let num_fields =
(header.header_size as usize - DBF_HEADER_SIZE - 1) / FIELD_DESCRIPTOR_SIZE;
let mut field_descriptors = Vec::with_capacity(num_fields);
for _ in 0..num_fields {
let descriptor = FieldDescriptor::read(&mut reader)?;
field_descriptors.push(descriptor);
}
let mut terminator = [0u8; 1];
reader
.read_exact(&mut terminator)
.map_err(|_| ShapefileError::unexpected_eof("reading header terminator"))?;
if terminator[0] != HEADER_TERMINATOR {
return Err(ShapefileError::InvalidDbfHeader {
message: format!(
"invalid header terminator: expected {}, got {}",
HEADER_TERMINATOR, terminator[0]
),
});
}
Ok(Self {
reader,
header,
field_descriptors,
})
}
pub fn header(&self) -> &DbfHeader {
&self.header
}
pub fn field_descriptors(&self) -> &[FieldDescriptor] {
&self.field_descriptors
}
pub fn read_record(&mut self) -> Result<Option<DbfRecord>> {
match DbfRecord::read(&mut self.reader, &self.field_descriptors) {
Ok(record) => Ok(Some(record)),
Err(ShapefileError::Io(ref e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
Ok(None)
}
Err(ShapefileError::UnexpectedEof { .. }) => {
Ok(None)
}
Err(e) => Err(e),
}
}
pub fn read_all_records(&mut self) -> Result<Vec<DbfRecord>> {
let mut records = Vec::with_capacity(self.header.record_count as usize);
while let Some(record) = self.read_record()? {
if records.len() >= self.header.record_count as usize {
break;
}
records.push(record);
}
Ok(records)
}
}
pub struct DbfWriter<W: Write> {
writer: W,
header: DbfHeader,
field_descriptors: Vec<FieldDescriptor>,
record_count: u32,
}
impl<W: Write> DbfWriter<W> {
pub fn new(writer: W, field_descriptors: Vec<FieldDescriptor>) -> Result<Self> {
let header = DbfHeader::new(0, &field_descriptors)?;
Ok(Self {
writer,
header,
field_descriptors,
record_count: 0,
})
}
pub fn write_header(&mut self) -> Result<()> {
self.header.record_count = self.record_count;
self.header.write(&mut self.writer)?;
for field in &self.field_descriptors {
field.write(&mut self.writer)?;
}
self.writer
.write_all(&[HEADER_TERMINATOR])
.map_err(ShapefileError::Io)?;
Ok(())
}
pub fn write_record(&mut self, record: &DbfRecord) -> Result<()> {
record.write(&mut self.writer, &self.field_descriptors)?;
self.record_count += 1;
Ok(())
}
pub fn flush(&mut self) -> Result<()> {
self.writer.flush().map_err(ShapefileError::Io)
}
pub fn finalize(mut self) -> Result<()> {
self.writer
.write_all(&[FILE_TERMINATOR])
.map_err(ShapefileError::Io)?;
Ok(())
}
}
impl<W: Write + Seek> DbfWriter<W> {
pub fn update_record_count(&mut self) -> Result<()> {
use byteorder::WriteBytesExt;
self.header.record_count = self.record_count;
self.writer
.seek(std::io::SeekFrom::Start(4))
.map_err(ShapefileError::Io)?;
self.writer
.write_u32::<LittleEndian>(self.record_count)
.map_err(ShapefileError::Io)?;
self.writer.flush().map_err(ShapefileError::Io)?;
self.writer
.seek(std::io::SeekFrom::End(0))
.map_err(ShapefileError::Io)?;
Ok(())
}
}
impl DbfWriter<std::fs::File> {
pub fn sync_all(&mut self) -> Result<()> {
self.writer.sync_all().map_err(ShapefileError::Io)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_dbf_header_round_trip() {
let fields = vec![
FieldDescriptor::new("NAME".to_string(), FieldType::Character, 50, 0)
.expect("valid NAME field descriptor"),
FieldDescriptor::new("VALUE".to_string(), FieldType::Number, 10, 2)
.expect("valid VALUE field descriptor"),
];
let header = DbfHeader::new(10, &fields).expect("valid dbf header");
let mut buffer = Vec::new();
header.write(&mut buffer).expect("write dbf header");
assert_eq!(buffer.len(), DBF_HEADER_SIZE);
let mut cursor = Cursor::new(buffer);
let read_header = DbfHeader::read(&mut cursor).expect("read dbf header");
assert_eq!(read_header.version, 3);
assert_eq!(read_header.record_count, 10);
}
#[test]
fn test_dbf_record_round_trip() {
let fields = vec![
FieldDescriptor::new("NAME".to_string(), FieldType::Character, 10, 0)
.expect("valid NAME field descriptor"),
FieldDescriptor::new("AGE".to_string(), FieldType::Number, 3, 0)
.expect("valid AGE field descriptor"),
];
let record = DbfRecord::new(vec![
FieldValue::String("Alice".to_string()),
FieldValue::Integer(30),
]);
let mut buffer = Vec::new();
record
.write(&mut buffer, &fields)
.expect("write dbf record");
let mut cursor = Cursor::new(buffer);
let read_record = DbfRecord::read(&mut cursor, &fields).expect("read dbf record");
assert!(!read_record.deleted);
assert_eq!(read_record.values.len(), 2);
}
#[test]
fn test_dbf_reader_writer() {
let fields = vec![
FieldDescriptor::new("NAME".to_string(), FieldType::Character, 20, 0)
.expect("valid field"),
FieldDescriptor::new("VALUE".to_string(), FieldType::Number, 10, 2)
.expect("valid field"),
];
let mut buffer = Cursor::new(Vec::new());
let records = vec![
DbfRecord::new(vec![
FieldValue::String("Test1".to_string()),
FieldValue::Float(123.45),
]),
DbfRecord::new(vec![
FieldValue::String("Test2".to_string()),
FieldValue::Float(678.90),
]),
];
let header = DbfHeader::new(records.len() as u32, &fields).expect("valid header");
header.write(&mut buffer).expect("write header");
for field in &fields {
field.write(&mut buffer).expect("write field");
}
buffer
.write_all(&[HEADER_TERMINATOR])
.expect("write terminator");
for record in &records {
record.write(&mut buffer, &fields).expect("write record");
}
buffer.write_all(&[FILE_TERMINATOR]).expect("write EOF");
buffer.set_position(0);
let mut reader = DbfReader::new(buffer).expect("create reader");
assert_eq!(reader.field_descriptors().len(), 2);
let expected_record_size = 1 + 20 + 10; let _expected_size =
DBF_HEADER_SIZE + (2 * FIELD_DESCRIPTOR_SIZE) + 1 + (2 * expected_record_size) + 1;
let read_records = reader.read_all_records().expect("read records");
assert_eq!(read_records.len(), 2);
}
}