gistools/readers/shapefile/
dbf.rs

1use crate::{parsers::Reader, util::Date};
2use alloc::{string::String, vec, vec::Vec};
3use core::marker::PhantomData;
4use s2json::{MValue, MValueCompatible, PrimitiveValue, Properties, ValueType};
5
6/// The Header data explaining the contents of the DBF file
7#[derive(Debug, Clone, PartialEq)]
8pub struct DBFHeader {
9    /// The last updated date
10    #[allow(dead_code)]
11    pub last_updated: Date,
12    /// The number of records
13    pub records: u64,
14    /// The length of the header data
15    pub header_len: u64,
16    /// The length of each row
17    pub rec_len: u64,
18}
19
20/// Each row is a key definition to build the properties for each column
21#[derive(Debug, Default, PartialEq, Clone)]
22pub struct DBFRow {
23    /// The name of the row
24    name: String,
25    /// The data type of the row
26    data_type: char,
27    /// The length of the row
28    len: u64,
29    /// The decimal places of the row
30    #[allow(dead_code)]
31    decimal: u64,
32}
33
34/// # Database File
35///
36/// ## Description
37/// A DBF data class to parse the data from a DBF
38///
39/// ## Usage
40/// ```rust
41/// use gistools::{parsers::FileReader, readers::DataBaseFile};
42/// use s2json::MValue;
43/// use std::path::PathBuf;
44///
45/// let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
46/// path.push("tests/readers/shapefile/fixtures/empty.dbf");
47///
48/// let reader = FileReader::new(path).unwrap();
49/// let dbf = DataBaseFile::new(reader, Some("utf-8".into()));
50///
51/// // Get the header data
52/// let header = dbf.get_header();
53///
54/// // grab individual properties
55/// let properties_0: MValue = dbf.get_properties(0).unwrap();
56/// let properties_1: MValue = dbf.get_properties(1).unwrap();
57/// ```
58#[derive(Debug, Clone)]
59pub struct DataBaseFile<T: Reader, M: MValueCompatible = MValue> {
60    /// The input reader
61    reader: T,
62    header: DBFHeader,
63    rows: Vec<DBFRow>,
64    #[allow(dead_code)]
65    /// The encoding of the raw data to string; defaults to 'utf-8'. Others not supported.
66    encoding: Option<String>,
67    _phantom: PhantomData<M>,
68}
69impl<T: Reader, M: MValueCompatible> DataBaseFile<T, M> {
70    /// Create a new DBF data class given an input reader
71    pub fn new(mut reader: T, encoding: Option<String>) -> DataBaseFile<T, M> {
72        let header = DataBaseFile::<T, M>::parse_header(&mut reader);
73        let rows = DataBaseFile::<T, M>::parse_row_header(&mut reader, &header);
74        DataBaseFile::<T, M> { reader, header, rows, encoding, _phantom: PhantomData }
75    }
76
77    /// Create a copy of the header data
78    pub fn get_header(&self) -> &DBFHeader {
79        &self.header
80    }
81
82    /// Get the properties for the given index
83    pub fn get_properties(&self, index: u64) -> Option<M> {
84        let DBFHeader { records, rec_len, .. } = self.header;
85        if index > records - 1 {
86            return None;
87        }
88        let offset = ((self.rows.len() as u64 + 1) << 5) + 2 + index * rec_len;
89
90        Some(self.parse_properties(offset))
91    }
92
93    /// Get all the properties in the DBF
94    pub fn get_all_properties(&self) -> Vec<M> {
95        let DBFHeader { records, .. } = self.header;
96        let mut res: Vec<M> = vec![];
97        for i in 0..records {
98            if let Some(properties) = self.get_properties(i) {
99                res.push(properties);
100            }
101        }
102
103        res
104    }
105
106    /// Parse the header and store it in the class
107    fn parse_header(reader: &mut T) -> DBFHeader {
108        DBFHeader {
109            last_updated: Date::new(
110                reader.uint8(Some(1)) as u16 + 1_900,
111                reader.uint8(Some(2)),
112                reader.uint8(Some(3)),
113            ),
114            records: reader.uint32_le(Some(4)) as u64,
115            header_len: reader.uint16_le(Some(8)) as u64,
116            rec_len: reader.uint16_le(Some(10)) as u64,
117        }
118    }
119
120    /// Parses the row header and builds an array of keys that each property may have
121    fn parse_row_header(reader: &mut T, header: &DBFHeader) -> Vec<DBFRow> {
122        let header_len = header.header_len;
123        let len = header_len - 1;
124        let mut res: Vec<DBFRow> = vec![];
125
126        let mut offset = 32;
127        while offset < len {
128            res.push(DBFRow {
129                name: reader.parse_string(Some(offset), Some(11)),
130                data_type: char::from(reader.uint8(Some(offset + 11))),
131                len: reader.uint8(Some(offset + 16)) as u64,
132                decimal: reader.uint8(Some(offset + 17)) as u64,
133            });
134            if reader.uint8(Some(offset + 32)) == 13 {
135                break;
136            } else {
137                offset += 32;
138            }
139        }
140
141        res
142    }
143
144    /// Parse the properties starting from the given offset
145    fn parse_properties(&self, mut offset: u64) -> M
146    where
147        M: MValueCompatible,
148    {
149        let mut properties: Properties = Properties::new();
150        for row in self.rows.clone().into_iter() {
151            let value = self.parse_value(offset, row.len, row.data_type);
152            offset += row.len;
153            properties.insert(row.name.clone(), ValueType::Primitive(value));
154        }
155
156        properties.into()
157    }
158
159    /// Parse the value at the given offset
160    fn parse_value(&self, offset: u64, len: u64, v_type: char) -> PrimitiveValue {
161        let text_data: String = self.reader.parse_string(Some(offset), Some(len)).trim().into();
162
163        match v_type {
164            'N' | 'F' | 'O' => text_data.parse::<f64>().unwrap_or(0.0).into(),
165            'D' => {
166                let year = text_data[0..4].parse::<u16>().unwrap_or(0) + 1900;
167                let month = text_data[4..6].parse::<u8>().unwrap_or(0);
168                let day = text_data[6..8].parse::<u8>().unwrap_or(0);
169                Date::new(year, month, day).get_time().into()
170            }
171            'L' => (text_data.to_lowercase() == "y" || text_data.to_lowercase() == "t").into(),
172            _ => {
173                // C is char
174                if text_data == "undefined" { PrimitiveValue::Null } else { text_data.into() }
175            }
176        }
177    }
178}