use std::collections::HashMap;
use serde::Serialize;
use marrow::datatypes::{DataType, Field};
use crate::internal::{error::Result, schema::transmute_field};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum TracingMode {
Unknown,
FromType,
FromSamples,
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct TracingOptions {
pub allow_null_fields: bool,
pub map_as_struct: bool,
pub sequence_as_large_list: bool,
pub string_as_large_utf8: bool,
pub bytes_as_large_binary: bool,
pub string_dictionary_encoding: bool,
pub coerce_numbers: bool,
pub allow_to_string: bool,
pub guess_dates: bool,
pub from_type_budget: usize,
pub enums_without_data_as_strings: bool,
pub overwrites: Overwrites,
pub(crate) tracing_mode: TracingMode,
}
impl Default for TracingOptions {
fn default() -> Self {
Self {
allow_null_fields: false,
map_as_struct: true,
string_dictionary_encoding: false,
coerce_numbers: false,
allow_to_string: false,
guess_dates: false,
from_type_budget: 100,
enums_without_data_as_strings: false,
overwrites: Overwrites::default(),
sequence_as_large_list: true,
string_as_large_utf8: true,
bytes_as_large_binary: true,
tracing_mode: TracingMode::Unknown,
}
}
}
impl TracingOptions {
pub fn new() -> Self {
Default::default()
}
pub fn allow_null_fields(mut self, value: bool) -> Self {
self.allow_null_fields = value;
self
}
pub fn map_as_struct(mut self, value: bool) -> Self {
self.map_as_struct = value;
self
}
pub fn sequence_as_large_list(mut self, value: bool) -> Self {
self.sequence_as_large_list = value;
self
}
pub fn strings_as_large_utf8(mut self, value: bool) -> Self {
self.string_as_large_utf8 = value;
self
}
pub fn bytes_as_large_binary(mut self, value: bool) -> Self {
self.bytes_as_large_binary = value;
self
}
pub fn string_dictionary_encoding(mut self, value: bool) -> Self {
self.string_dictionary_encoding = value;
self
}
pub fn coerce_numbers(mut self, value: bool) -> Self {
self.coerce_numbers = value;
self
}
pub fn allow_to_string(mut self, value: bool) -> Self {
self.allow_to_string = value;
self
}
pub fn guess_dates(mut self, value: bool) -> Self {
self.guess_dates = value;
self
}
pub fn from_type_budget(mut self, value: usize) -> Self {
self.from_type_budget = value;
self
}
pub fn enums_without_data_as_strings(mut self, value: bool) -> Self {
self.enums_without_data_as_strings = value;
self
}
pub fn overwrite<P: Into<String>, F: Serialize>(mut self, path: P, field: F) -> Result<Self> {
self.overwrites.0.insert(
format!("$.{path}", path = path.into()),
transmute_field(field)?,
);
Ok(self)
}
pub(crate) fn tracing_mode(mut self, value: TracingMode) -> Self {
self.tracing_mode = value;
self
}
pub(crate) fn get_overwrite(&self, path: &str) -> Option<&Field> {
self.overwrites.0.get(path)
}
pub(crate) fn string_type(&self) -> DataType {
if self.string_as_large_utf8 {
DataType::LargeUtf8
} else {
DataType::Utf8
}
}
pub(crate) fn binary_type(&self) -> DataType {
if self.bytes_as_large_binary {
DataType::LargeBinary
} else {
DataType::Binary
}
}
}
#[derive(Debug, Clone, Default, PartialEq)]
pub struct Overwrites(pub(crate) HashMap<String, Field>);