use std::{collections::HashMap, io::BufReader, fs::File};
use crate::{FieldInfo, FieldType, shapefile::Error, };
pub use uuid::Uuid;
pub struct Shapefile {
pub reader: crate::shapefile::Reader<BufReader<File>>
}
impl Shapefile {
pub fn len(&self) -> usize {
self.reader.dbase_reader.header.num_records as usize
}
pub fn open(path: &str) -> Result<Self, Error> {
Ok(
Shapefile {
reader: crate::shapefile::Reader::from_path(path)?
}
)
}
pub fn fields(&self) -> &[FieldInfo] {
self.reader.dbase_reader.fields()
}
}
#[derive(Debug)]
struct FieldMeta {
kind: FieldType,
length: u8,
offset: u32,
}
#[derive(Debug)]
pub struct FieldMap<'a> {
fields: HashMap<&'a str, FieldMeta>
}
impl<'a> FieldMap<'a> {
pub fn new(fields: &'a [FieldInfo]) -> Self {
let mut field_map: HashMap<&'a str, FieldMeta> = HashMap::default();
let mut offset = 0;
for (_, field) in fields.iter().enumerate() {
field_map.insert(
&field.name(),
FieldMeta {
kind: field.field_type(),
offset,
length: field.length(),
}
);
offset += field.length() as u32
}
FieldMap{
fields: field_map
}
}
}
pub use time::Date;
pub use time::OffsetDateTime as DateTime;
#[macro_export]
macro_rules! __field_type {
(Uuid) => {$crate::fields::Uuid};
(None Uuid) => {Option<$crate::fields::Uuid>};
(str) => {&'a str};
(bool) => {bool};
(f64) => {f64};
(i64) => {i64};
(usize) => {usize};
(Date) => {$crate::fields::Date};
(DateTime) => {$crate::fields::DateTime};
}
#[macro_export]
macro_rules! __offset_type {
(None Uuid) => {$crate::fields::OptionalUuidFieldOffset};
(f64) => {$crate::fields::FloatFieldOffset};
(i64) => {$crate::fields::IntFieldOffset};
(usize) => {$crate::fields::UsizeFieldOffset};
(Uuid) => {$crate::fields::UuidFieldOffset};
(str) => {$crate::fields::StringFieldOffset};
(bool) => {$crate::fields::BooleanFieldOffset};
(Date) => {$crate::fields::DateFieldOffset};
(DateTime) => {$crate::fields::DateTimeFieldOffset};
}
#[macro_export]
macro_rules! __update_id {
(id: Uuid, $set: ident, $val: ident) => {
$set = Some($val);
};
($id: ident: $kine:ident, $set: ident, $val: ident) => {}
}
#[macro_export]
macro_rules! schema {
($name: ident {$($field: ident $key: literal
$(or $val: literal)?
: $kind: ident $(else $filter: ident)?
$(from $arg: expr)?
),* $(,)?}) => {
mod $name {
pub struct FieldMapping {
$($field: $crate::__offset_type!($($filter)? $kind)),*
}
#[derive(Debug)]
#[allow(dead_code)]
pub struct Row<'a> {
__lifetime_hack: &'a ()
$(,pub $field: $crate::__field_type!($($filter)? $kind))*
}
impl FieldMapping {
pub(crate) fn of<'a>(&self, bytes: &'a [u8]) -> Result<
(Row<'a>, Option<$crate::fields::Uuid>),
(&'static str, Option<$crate::fields::Uuid>)
> {
let mut id = None;
Ok((Row {
__lifetime_hack: &(),
$( $field: if let Some(val) = self.$field.of(bytes)$(.or(Some($val)))?$(.or($filter))? {
$crate::__update_id!($field: $kind, id, val);
val
} else {
return Err(($key, id))
}),*
}, id))
}
}
pub(crate) fn parser(map: &$crate::fields::FieldMap<'_>) -> $crate::fields::Ret<FieldMapping> {
Ok(FieldMapping {
$( $field: <$crate::__offset_type!($($filter)? $kind)>::new(
map, $key $(, $arg)?
)?),*
})
}
pub fn read(mut shp: $crate::Shapefile, mut func: impl FnMut(usize, $crate::shapefile::Shape, Row<'_>) -> Result<
(),
Box<dyn std::error::Error>
>) {
let mut reader = shp.reader;
let get_uasfm =parser(&$crate::fields::FieldMap::new(
reader.dbase_reader.fields()
)).unwrap();
let mut rows = reader.dbase_reader.iter_record_rows();
let mut shapes = reader.shape_reader.iter_shapes();
let mut row_index = 0;
let mut errors = 0;
while let Some(bytes) = rows.next() {
let shape = shapes.next().unwrap().unwrap();
match get_uasfm.of(bytes) {
Ok((row, id)) => { if let Err(err) = func(row_index, shape, row){
if errors < 5 {
if let Some(id) = id {
tiny_log::warn!("DBase Row Failed", schema=stringify!($name), id, err);
} else {
tiny_log::warn!("DBase Row Failed", schema=stringify!($name), row_index, err);
}
} else if errors == 6 {
tiny_log::warn!("Many errors of have occoured, truncating");
}
errors += 1;
}},
Err((field, id)) => {
if errors < 5 {
if let Some(id) = id {
tiny_log::warn!("DBase Row Parsing Error",
schema=stringify!($name),
id,
err="Invalid/Missing Field",
field
);
} else {
tiny_log::warn!("DBase Row Parsing Error",
schema=stringify!($name),
row_index,
err="Invalid/Missing Field",
field
);
}
} else if errors == 6 {
tiny_log::warn!("Many errors of have occoured, truncating");
}
errors += 1;
}
}
row_index += 1;
}
if errors > 0 {
tiny_log::error!("Failed ETL entries", schema=stringify!($name), errors);
}
}
}
};
}
pub type Ret<T> = Result<T, Box<dyn std::error::Error>>;
pub struct BooleanFieldOffset{
offset:u32,
}
impl BooleanFieldOffset {
pub fn new(map: &FieldMap<'_>, name: &str) -> Ret<BooleanFieldOffset> {
let field = map.fields.get(name).ok_or_else(||{
format!("{name}: Missing Field")
})?;
let is_valid_type = match field.kind {
FieldType::Character | FieldType::Numeric => {
field.length == 1
},
FieldType::Logical => true,
_ => false
};
if !is_valid_type {
return Err(format!("{name}: Invalid Type").into())
}
Ok(BooleanFieldOffset {
offset: field.offset,
})
}
pub fn of(&self, row_bytes: &[u8]) -> Option<bool> {
match *row_bytes.get(self.offset as usize)? {
b'1' | b'T' | b't' | b'Y' | b'y' => Some(true),
b'N' | b'n' | b'F' | b'f' | b'0' => Some(false),
_ => {
None
},
}
}
}
pub struct UsizeFieldOffset (IntFieldOffset);
impl UsizeFieldOffset {
pub fn new(map: &FieldMap<'_>, name: &str) -> Ret<UsizeFieldOffset> {
Ok(UsizeFieldOffset(IntFieldOffset::new(map ,name)?))
}
pub fn of<'a>(&self, row_bytes: &'a [u8]) -> Option<usize> {
self.0.of(row_bytes)?.try_into().ok()
}
}
pub struct IntFieldOffset {
offset: u32,
length: u8,
}
impl IntFieldOffset {
pub fn new(map: &FieldMap<'_>, name: &str) -> Ret<IntFieldOffset> {
let field = map.fields.get(name).ok_or_else(||{
format!("{name}: Missing Field")
})?;
if field.kind != FieldType::Character && field.kind != FieldType::Numeric {
return Err(format!("{name}: Invalid Type").into())
}
Ok(IntFieldOffset {
offset: field.offset,
length: field.length as u8,
})
}
pub fn byte_range(&self) -> std::ops::Range<usize> {
self.offset as usize.. self.offset as usize + self.length as usize
}
pub fn of<'a>(&self, row_bytes: &'a [u8]) -> Option<i64> {
let bytes = trim_field_data(row_bytes.get(self.byte_range())?);
std::str::from_utf8(bytes).ok()?.parse().ok()
}
}
pub struct FloatFieldOffset {
offset: u32,
length: u8,
}
impl FloatFieldOffset {
pub fn new(map: &FieldMap<'_>, name: &str) -> Ret<FloatFieldOffset> {
let field = map.fields.get(name).ok_or_else(||{
format!("{name}: Missing Field")
})?;
if field.kind != FieldType::Character && field.kind != FieldType::Numeric {
return Err(format!("{name}: Invalid Type").into())
}
Ok(FloatFieldOffset {
offset: field.offset,
length: field.length as u8,
})
}
pub fn byte_range(&self) -> std::ops::Range<usize> {
self.offset as usize.. self.offset as usize + self.length as usize
}
pub fn of<'a>(&self, row_bytes: &'a [u8]) -> Option<f64> {
let bytes = trim_field_data(row_bytes.get(self.byte_range())?);
fast_float::parse(bytes).ok()
}
}
pub struct UuidFieldOffset(StringFieldOffset);
impl UuidFieldOffset {
pub fn new(map: &FieldMap<'_>, name: &str) -> Ret<UuidFieldOffset> {
Ok(UuidFieldOffset(
StringFieldOffset::new(map, name)?
))
}
pub fn byte_range(&self) -> std::ops::Range<usize> {
self.0.byte_range()
}
pub fn of(&self, row_bytes: &[u8]) -> Option<Uuid> {
Uuid::try_parse_ascii(self.0.of(row_bytes)?.as_bytes()).ok()
}
}
pub struct OptionalUuidFieldOffset(StringFieldOffset);
impl OptionalUuidFieldOffset {
pub fn new(map: &FieldMap<'_>, name: &str) -> Ret<OptionalUuidFieldOffset> {
Ok(OptionalUuidFieldOffset(
StringFieldOffset::new(map, name)?
))
}
pub fn byte_range(&self) -> std::ops::Range<usize> {
self.0.byte_range()
}
pub fn of(&self, row_bytes: &[u8]) -> Option<Option<Uuid>> {
Some(Uuid::try_parse_ascii(self.0.of(row_bytes)?.as_bytes()).ok())
}
}
pub struct DateFieldOffset(
StringFieldOffset,
&'static [time::format_description::FormatItem<'static>]
);
impl DateFieldOffset {
pub fn new(map: &FieldMap<'_>, name: &str,
formating: &'static [time::format_description::FormatItem<'static>]
) -> Ret<DateFieldOffset> {
Ok(DateFieldOffset(
StringFieldOffset::new(map,name)?,
formating
))
}
pub fn of(&self, row_bytes: & [u8]) -> Option<Date> {
Date::parse(self.0.of(row_bytes)?, self.1).ok()
}
}
pub struct StringFieldOffset {
offset: u32,
length: u16,
}
impl StringFieldOffset {
pub fn new(map: &FieldMap<'_>, name: &str) -> Ret<StringFieldOffset> {
let field = map.fields.get(name).ok_or_else(||{
format!("{name}: Missing Field")
})?;
if field.kind != FieldType::Character {
return Err(format!("{name}: Invalid Type").into())
}
Ok(StringFieldOffset {
offset: field.offset,
length: field.length as u16,
})
}
pub fn byte_range(&self) -> std::ops::Range<usize> {
self.offset as usize.. self.offset as usize + self.length as usize
}
pub fn of<'a>(&self, row_bytes: &'a [u8]) -> Option<&'a str> {
let bytes = trim_field_data(row_bytes.get(self.byte_range())?);
std::str::from_utf8(bytes).ok()
}
}
pub struct DateTimeFieldOffset {
offset: u32,
length: u16,
}
impl DateTimeFieldOffset {
pub fn new(map: &FieldMap<'_>, name: &str) -> Ret<DateTimeFieldOffset> {
let field = map.fields.get(name).ok_or_else(||{
format!("{name}: Missing Field")
})?;
if field.kind != FieldType::Date {
return Err(format!("{name}: Invalid Type").into())
}
Ok(DateTimeFieldOffset {
offset: field.offset,
length: field.length as u16,
})
}
pub fn byte_range(&self) -> std::ops::Range<usize> {
self.offset as usize.. self.offset as usize + self.length as usize
}
pub fn of<'a>(&self, row_bytes: &'a [u8]) -> Option<DateTime> {
let bytes = trim_field_data(row_bytes.get(self.byte_range())?);
let s = std::str::from_utf8(bytes).ok()?;
let year = s[0..4].parse::<i32>().ok()?;
let month = s[4..6].parse::<u8>().ok()?;
let day = s[6..8].parse::<u8>().ok()?;
let date = Date::from_calendar_date(year, month.try_into().ok()?, day).ok()?;
Some(date.with_hms(0, 0, 0).ok()?.assume_utc())
}
}
fn trim_field_data(bytes: &[u8]) -> &[u8] {
let mut first = usize::MAX;
let mut last = 0;
let ptr = bytes.as_ptr();
unsafe {
for i in 0..bytes.len() {
if *ptr.add(i) == 0u8 {
break;
}
if *ptr.add(i) != 32 {
if first == usize::MAX {
first = i;
}
last = i;
}
}
}
if first == usize::MAX {
return &[];
}
&bytes[first..(last + 1)]
}