1use geonative_core::{Crs, FieldDef, GeomField, Schema, Value, ValueType};
36
37use crate::error::{Result, ShpError};
38
39#[derive(Debug, Clone)]
40pub struct DbfHeader {
41 pub n_records: u32,
42 pub header_len: u16,
43 pub record_len: u16,
44 pub fields: Vec<DbfField>,
45}
46
47#[derive(Debug, Clone)]
48pub struct DbfField {
49 pub name: String,
50 pub kind: u8, pub length: u8,
52 pub decimals: u8,
53 pub offset_in_record: usize,
56}
57
58pub fn parse_header(bytes: &[u8]) -> Result<DbfHeader> {
59 if bytes.len() < 32 {
60 return Err(ShpError::malformed("dbf shorter than 32-byte header"));
61 }
62 let n_records = u32::from_le_bytes(bytes[4..8].try_into().unwrap());
63 let header_len = u16::from_le_bytes(bytes[8..10].try_into().unwrap());
64 let record_len = u16::from_le_bytes(bytes[10..12].try_into().unwrap());
65 if (header_len as usize) > bytes.len() {
66 return Err(ShpError::malformed(format!(
67 "dbf header_len {header_len} > file size {}",
68 bytes.len()
69 )));
70 }
71
72 let mut fields = Vec::new();
73 let mut pos = 32usize;
74 let mut field_offset = 1usize; while pos < header_len as usize && bytes.get(pos) != Some(&0x0D) {
76 if pos + 32 > bytes.len() {
77 return Err(ShpError::malformed("dbf field descriptor truncated"));
78 }
79 let name_bytes = &bytes[pos..pos + 11];
80 let name_end = name_bytes
81 .iter()
82 .position(|&b| b == 0)
83 .unwrap_or(name_bytes.len());
84 let name = String::from_utf8_lossy(&name_bytes[..name_end]).into_owned();
85 let kind = bytes[pos + 11];
86 let length = bytes[pos + 16];
87 let decimals = bytes[pos + 17];
88 fields.push(DbfField {
89 name,
90 kind,
91 length,
92 decimals,
93 offset_in_record: field_offset,
94 });
95 field_offset += length as usize;
96 pos += 32;
97 }
98
99 Ok(DbfHeader {
100 n_records,
101 header_len,
102 record_len,
103 fields,
104 })
105}
106
107pub fn field_to_def(f: &DbfField) -> FieldDef {
109 let ty = match f.kind {
110 b'C' => ValueType::String,
111 b'N' if f.decimals == 0 => ValueType::Int64,
112 b'N' | b'F' => ValueType::Float64,
113 b'D' => ValueType::DateTime,
114 b'L' => ValueType::Bool,
115 _ => ValueType::String, };
117 FieldDef::new(f.name.clone(), ty, true).with_width(f.length as u32)
118}
119
120pub fn build_schema(header: &DbfHeader, geom: GeomField, crs: Crs) -> Schema {
123 let fields = header.fields.iter().map(field_to_def).collect();
124 Schema::new(fields, Some(geom), crs)
125}
126
127pub fn decode_field(record_bytes: &[u8], field: &DbfField) -> Value {
130 let start = field.offset_in_record;
131 let end = start + field.length as usize;
132 if end > record_bytes.len() {
133 return Value::Null;
134 }
135 let raw = &record_bytes[start..end];
136 let trimmed = trim_ascii(raw);
137
138 match field.kind {
139 b'C' => {
140 if trimmed.is_empty() {
141 Value::Null
142 } else {
143 Value::String(String::from_utf8_lossy(trimmed).into_owned())
144 }
145 }
146 b'N' => {
147 let s = std::str::from_utf8(trimmed).unwrap_or("").trim();
148 if s.is_empty() {
149 return Value::Null;
150 }
151 if field.decimals == 0 {
152 s.parse::<i64>()
153 .ok()
154 .map(Value::Int64)
155 .unwrap_or(Value::Null)
156 } else {
157 s.parse::<f64>()
158 .ok()
159 .map(Value::Float64)
160 .unwrap_or(Value::Null)
161 }
162 }
163 b'F' => {
164 let s = std::str::from_utf8(trimmed).unwrap_or("").trim();
165 if s.is_empty() {
166 return Value::Null;
167 }
168 s.parse::<f64>()
169 .ok()
170 .map(Value::Float64)
171 .unwrap_or(Value::Null)
172 }
173 b'D' => {
174 if trimmed.len() != 8 {
176 return Value::Null;
177 }
178 let s = std::str::from_utf8(trimmed).unwrap_or("");
179 if s.chars().all(|c| c == ' ') || s.is_empty() {
180 return Value::Null;
181 }
182 let y: i32 = s[0..4].parse().unwrap_or(0);
183 let m: u32 = s[4..6].parse().unwrap_or(0);
184 let d: u32 = s[6..8].parse().unwrap_or(0);
185 if y == 0 && m == 0 && d == 0 {
186 return Value::Null;
187 }
188 Value::DateTime(ymd_to_gdb_days(y, m, d))
189 }
190 b'L' => match trimmed.first() {
191 Some(b'T') | Some(b't') | Some(b'Y') | Some(b'y') => Value::Bool(true),
192 Some(b'F') | Some(b'f') | Some(b'N') | Some(b'n') => Value::Bool(false),
193 _ => Value::Null,
194 },
195 _ => {
196 if trimmed.is_empty() {
198 Value::Null
199 } else {
200 Value::String(String::from_utf8_lossy(trimmed).into_owned())
201 }
202 }
203 }
204}
205
206fn trim_ascii(b: &[u8]) -> &[u8] {
208 let mut start = 0;
209 let mut end = b.len();
210 while start < end && b[start] == b' ' {
211 start += 1;
212 }
213 while end > start && b[end - 1] == b' ' {
214 end -= 1;
215 }
216 &b[start..end]
217}
218
219fn ymd_to_gdb_days(year: i32, month: u32, day: u32) -> f64 {
222 let (y, m) = if month <= 2 {
225 (year - 1, month + 12)
226 } else {
227 (year, month)
228 };
229 let a = (y as i64) / 100;
230 let b = 2 - a + a / 4;
231 let jdn = (365.25 * (y as i64 + 4716) as f64) as i64
232 + (30.6001 * (m as i64 + 1) as f64) as i64
233 + day as i64
234 + b
235 - 1524;
236 (jdn - 2_415_019) as f64
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243
244 fn make_dbf(fields: &[(&str, u8, u8, u8)], records: &[&[u8]]) -> Vec<u8> {
245 let n_records = records.len() as u32;
246 let header_len = (32 + fields.len() * 32 + 1) as u16;
247 let record_len: u16 = 1 + fields.iter().map(|f| f.2 as u16).sum::<u16>();
248 let mut buf = vec![0u8; 32];
249 buf[0] = 0x03;
250 buf[4..8].copy_from_slice(&n_records.to_le_bytes());
251 buf[8..10].copy_from_slice(&header_len.to_le_bytes());
252 buf[10..12].copy_from_slice(&record_len.to_le_bytes());
253 for (name, kind, length, decimals) in fields {
254 let mut desc = [0u8; 32];
255 let name_bytes = name.as_bytes();
256 desc[..name_bytes.len()].copy_from_slice(name_bytes);
257 desc[11] = *kind;
258 desc[16] = *length;
259 desc[17] = *decimals;
260 buf.extend_from_slice(&desc);
261 }
262 buf.push(0x0D);
263 for r in records {
264 assert_eq!(r.len(), record_len as usize, "test record length mismatch");
265 buf.extend_from_slice(r);
266 }
267 buf.push(0x1A);
268 buf
269 }
270
271 #[test]
272 fn parse_simple_header() {
273 let dbf = make_dbf(
275 &[("ID", b'N', 10, 0), ("NAME", b'C', 8, 0)],
276 &[b" 0000000001Alice "],
277 );
278 let h = parse_header(&dbf).unwrap();
279 assert_eq!(h.n_records, 1);
280 assert_eq!(h.fields.len(), 2);
281 assert_eq!(h.fields[0].name, "ID");
282 assert_eq!(h.fields[1].name, "NAME");
283 }
284
285 #[test]
286 fn decode_integer_string_and_bool() {
287 let dbf = make_dbf(
289 &[("ID", b'N', 5, 0), ("NAME", b'C', 5, 0), ("OK", b'L', 1, 0)],
290 &[b" 0042AliceT"],
291 );
292 let h = parse_header(&dbf).unwrap();
293 let rec_start = h.header_len as usize;
294 let rec = &dbf[rec_start..rec_start + h.record_len as usize];
295 assert_eq!(decode_field(rec, &h.fields[0]), Value::Int64(42));
296 assert_eq!(
297 decode_field(rec, &h.fields[1]),
298 Value::String("Alice".into())
299 );
300 assert_eq!(decode_field(rec, &h.fields[2]), Value::Bool(true));
301 }
302
303 #[test]
304 fn decode_float_with_decimals() {
305 let dbf = make_dbf(&[("VAL", b'N', 7, 2)], &[b" 12.34"]);
307 let h = parse_header(&dbf).unwrap();
308 let rec_start = h.header_len as usize;
309 let rec = &dbf[rec_start..rec_start + h.record_len as usize];
310 match decode_field(rec, &h.fields[0]) {
311 Value::Float64(f) => assert!((f - 12.34).abs() < 1e-9),
312 other => panic!("expected float, got {:?}", other),
313 }
314 }
315
316 #[test]
317 fn decode_date_field() {
318 let dbf = make_dbf(&[("D", b'D', 8, 0)], &[b" 20240601"]);
319 let h = parse_header(&dbf).unwrap();
320 let rec_start = h.header_len as usize;
321 let rec = &dbf[rec_start..rec_start + h.record_len as usize];
322 match decode_field(rec, &h.fields[0]) {
323 Value::DateTime(d) => {
324 assert!(d > 45000.0 && d < 46000.0, "got {d}");
326 }
327 other => panic!("expected datetime, got {:?}", other),
328 }
329 }
330
331 #[test]
332 fn blank_numeric_is_null() {
333 let dbf = make_dbf(&[("N", b'N', 5, 0)], &[b" "]);
335 let h = parse_header(&dbf).unwrap();
336 let rec_start = h.header_len as usize;
337 let rec = &dbf[rec_start..rec_start + h.record_len as usize];
338 assert_eq!(decode_field(rec, &h.fields[0]), Value::Null);
339 }
340}