use std::collections::HashMap;
use std::default::Default;
use std::fmt;
use serde_derive::{Deserialize, Serialize};
use serde_json::{json, Value};
use crate::error::{ArrowError, Result};
use super::Field;
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct Schema {
pub(crate) fields: Vec<Field>,
#[serde(skip_serializing_if = "HashMap::is_empty")]
pub(crate) metadata: HashMap<String, String>,
}
impl Schema {
pub fn empty() -> Self {
Self {
fields: vec![],
metadata: HashMap::new(),
}
}
pub fn new(fields: Vec<Field>) -> Self {
Self::new_with_metadata(fields, HashMap::new())
}
#[inline]
pub const fn new_with_metadata(
fields: Vec<Field>,
metadata: HashMap<String, String>,
) -> Self {
Self { fields, metadata }
}
pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self> {
schemas
.into_iter()
.try_fold(Self::empty(), |mut merged, schema| {
let Schema { metadata, fields } = schema;
for (key, value) in metadata.into_iter() {
if let Some(old_val) = merged.metadata.get(&key) {
if old_val != &value {
return Err(ArrowError::SchemaError(
"Fail to merge schema due to conflicting metadata."
.to_string(),
));
}
}
merged.metadata.insert(key, value);
}
for field in fields.into_iter() {
let mut new_field = true;
for merged_field in &mut merged.fields {
if field.name() != merged_field.name() {
continue;
}
new_field = false;
merged_field.try_merge(&field)?
}
if new_field {
merged.fields.push(field);
}
}
Ok(merged)
})
}
#[inline]
pub const fn fields(&self) -> &Vec<Field> {
&self.fields
}
pub fn field(&self, i: usize) -> &Field {
&self.fields[i]
}
pub fn field_with_name(&self, name: &str) -> Result<&Field> {
Ok(&self.fields[self.index_of(name)?])
}
pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
self.fields
.iter()
.filter(|f| f.dict_id() == Some(dict_id))
.collect()
}
pub fn index_of(&self, name: &str) -> Result<usize> {
for i in 0..self.fields.len() {
if self.fields[i].name() == name {
return Ok(i);
}
}
let valid_fields: Vec<String> =
self.fields.iter().map(|f| f.name().clone()).collect();
Err(ArrowError::InvalidArgumentError(format!(
"Unable to get field named \"{}\". Valid fields: {:?}",
name, valid_fields
)))
}
#[inline]
pub const fn metadata(&self) -> &HashMap<String, String> {
&self.metadata
}
pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
self.fields
.iter()
.enumerate()
.find(|&(_, c)| c.name() == name)
}
pub fn to_json(&self) -> Value {
json!({
"fields": self.fields.iter().map(|field| field.to_json()).collect::<Vec<Value>>(),
"metadata": serde_json::to_value(&self.metadata).unwrap()
})
}
pub fn from(json: &Value) -> Result<Self> {
match *json {
Value::Object(ref schema) => {
let fields = if let Some(Value::Array(fields)) = schema.get("fields") {
fields
.iter()
.map(|f| Field::from(f))
.collect::<Result<_>>()?
} else {
return Err(ArrowError::ParseError(
"Schema fields should be an array".to_string(),
));
};
let metadata = if let Some(value) = schema.get("metadata") {
Self::from_metadata(value)?
} else {
HashMap::default()
};
Ok(Self { fields, metadata })
}
_ => Err(ArrowError::ParseError(
"Invalid json value type for schema".to_string(),
)),
}
}
fn from_metadata(json: &Value) -> Result<HashMap<String, String>> {
match json {
Value::Array(_) => {
let mut hashmap = HashMap::new();
let values: Vec<MetadataKeyValue> = serde_json::from_value(json.clone())
.map_err(|_| {
ArrowError::JsonError(
"Unable to parse object into key-value pair".to_string(),
)
})?;
for meta in values {
hashmap.insert(meta.key.clone(), meta.value);
}
Ok(hashmap)
}
Value::Object(md) => md
.iter()
.map(|(k, v)| {
if let Value::String(v) = v {
Ok((k.to_string(), v.to_string()))
} else {
Err(ArrowError::ParseError(
"metadata `value` field must be a string".to_string(),
))
}
})
.collect::<Result<_>>(),
_ => Err(ArrowError::ParseError(
"`metadata` field must be an object".to_string(),
)),
}
}
pub fn contains(&self, other: &Schema) -> bool {
if self.fields.len() != other.fields.len() {
return false;
}
for (i, field) in other.fields.iter().enumerate() {
if !self.fields[i].contains(field) {
return false;
}
}
for (k, v) in &other.metadata {
match self.metadata.get(k) {
Some(s) => {
if s != v {
return false;
}
}
None => {
return false;
}
}
}
true
}
}
impl fmt::Display for Schema {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(
&self
.fields
.iter()
.map(|c| c.to_string())
.collect::<Vec<String>>()
.join(", "),
)
}
}
#[derive(Deserialize)]
struct MetadataKeyValue {
key: String,
value: String,
}