use std::{fmt, hash::Hash, ops::Deref, sync::Arc};
use polars::prelude::{Expr, PlSmallStr, Selector, col};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(transparent)]
pub struct SourceColumnName(String);
impl SourceColumnName {
pub fn new(name: impl Into<String>) -> Self {
Self(name.into())
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn into_inner(self) -> String {
self.0
}
}
impl Deref for SourceColumnName {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl AsRef<str> for SourceColumnName {
fn as_ref(&self) -> &str {
&self.0
}
}
impl fmt::Display for SourceColumnName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Source: {}", self.0)
}
}
impl From<&str> for SourceColumnName {
fn from(s: &str) -> Self {
Self(s.to_string())
}
}
impl From<String> for SourceColumnName {
fn from(s: String) -> Self {
Self(s)
}
}
impl From<PlSmallStr> for SourceColumnName {
fn from(s: PlSmallStr) -> Self {
Self(s.to_string())
}
}
impl From<SourceColumnName> for PlSmallStr {
fn from(c: SourceColumnName) -> Self {
PlSmallStr::from(c.as_str())
}
}
impl From<SourceColumnName> for Selector {
fn from(c: SourceColumnName) -> Self {
Selector::ByName {
names: Arc::from(vec![PlSmallStr::from_string(c.0)]),
strict: false,
}
}
}
impl From<&SourceColumnName> for Selector {
fn from(c: &SourceColumnName) -> Self {
Selector::ByName {
names: Arc::from(vec![PlSmallStr::from(c.as_str())]),
strict: false,
}
}
}
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(transparent)]
pub struct CanonicalColumnName(String);
impl CanonicalColumnName {
pub fn new(name: impl Into<String>) -> Self {
Self(name.into())
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn into_inner(self) -> String {
self.0
}
}
impl Deref for CanonicalColumnName {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl AsRef<str> for CanonicalColumnName {
fn as_ref(&self) -> &str {
&self.0
}
}
impl fmt::Display for CanonicalColumnName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Canonical: {}", self.0)
}
}
impl From<&str> for CanonicalColumnName {
fn from(s: &str) -> Self {
Self(s.to_string())
}
}
impl From<String> for CanonicalColumnName {
fn from(s: String) -> Self {
Self(s)
}
}
impl From<CanonicalColumnName> for String {
fn from(c: CanonicalColumnName) -> Self {
c.0
}
}
impl From<PlSmallStr> for CanonicalColumnName {
fn from(s: PlSmallStr) -> Self {
Self(s.to_string())
}
}
impl From<&CanonicalColumnName> for PlSmallStr {
fn from(c: &CanonicalColumnName) -> Self {
PlSmallStr::from(c.as_str())
}
}
impl From<CanonicalColumnName> for PlSmallStr {
fn from(c: CanonicalColumnName) -> Self {
PlSmallStr::from(c.as_str())
}
}
impl From<CanonicalColumnName> for Selector {
fn from(c: CanonicalColumnName) -> Self {
Selector::ByName {
names: Arc::from(vec![PlSmallStr::from_string(c.0)]),
strict: false,
}
}
}
impl From<&CanonicalColumnName> for Selector {
fn from(c: &CanonicalColumnName) -> Self {
Selector::ByName {
names: Arc::from(vec![PlSmallStr::from(c.as_str())]),
strict: false,
}
}
}
impl From<CanonicalColumnName> for Expr {
fn from(c: CanonicalColumnName) -> Self {
col(&c.0)
}
}
impl From<CanonicalColumnName> for Vec<String> {
fn from(c: CanonicalColumnName) -> Self {
vec![c.0]
}
}
impl From<CanonicalColumnName> for Option<Vec<String>> {
fn from(c: CanonicalColumnName) -> Self {
Some(vec![c.0])
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
pub struct DomainSignature {
pub subject: CanonicalColumnName,
pub time: Option<CanonicalColumnName>,
pub components: Vec<CanonicalColumnName>,
}
impl DomainSignature {
pub fn quality(subject: impl Into<String>) -> Self {
Self {
subject: CanonicalColumnName::new(subject),
time: None,
components: vec![],
}
}
pub fn measurement(subject: impl Into<String>, time: impl Into<String>) -> Self {
Self {
subject: CanonicalColumnName::new(subject),
time: Some(CanonicalColumnName::new(time)),
components: vec![],
}
}
pub fn with_components(mut self, mut components: Vec<String>) -> Self {
components.sort();
self.components = components
.into_iter()
.map(CanonicalColumnName::new)
.collect();
self
}
pub fn can_stack_with(&self, other: &DomainSignature) -> bool {
self == other
}
pub fn can_join_with(&self, other: &DomainSignature) -> bool {
self.subject == other.subject
}
pub fn needs_component_reduction(&self, other: &DomainSignature) -> bool {
self.subject == other.subject
&& self.time == other.time
&& self.components.len() < other.components.len()
}
}
pub trait ColumnNameExt {
fn canonical(self) -> CanonicalColumnName;
fn source(self) -> SourceColumnName;
}
impl ColumnNameExt for &str {
fn canonical(self) -> CanonicalColumnName {
CanonicalColumnName::new(self)
}
fn source(self) -> SourceColumnName {
SourceColumnName::new(self)
}
}
impl ColumnNameExt for String {
fn canonical(self) -> CanonicalColumnName {
CanonicalColumnName::new(self)
}
fn source(self) -> SourceColumnName {
SourceColumnName::new(self)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_source_column_name() {
let col = SourceColumnName::new("pump_station_id");
assert_eq!(col.as_str(), "pump_station_id");
assert_eq!(&*col, "pump_station_id"); assert_eq!(format!("{}", col), "Source: pump_station_id"); }
#[test]
fn test_canonical_column_name() {
let col = CanonicalColumnName::new("station");
assert_eq!(col.as_str(), "station");
assert_eq!(&*col, "station"); assert_eq!(format!("{}", col), "Canonical: station"); }
#[test]
fn test_column_name_equality() {
let a = SourceColumnName::new("col_a");
let b = SourceColumnName::new("col_a");
let c = SourceColumnName::new("col_c");
assert_eq!(a, b);
assert_ne!(a, c);
}
#[test]
fn test_domain_signature_stack() {
let sig1 =
DomainSignature::measurement("station", "time").with_components(vec!["color".into()]);
let sig2 =
DomainSignature::measurement("station", "time").with_components(vec!["color".into()]);
assert!(sig1.can_stack_with(&sig2));
}
#[test]
fn test_domain_signature_join() {
let sig1 = DomainSignature::measurement("station", "time");
let sig2 = DomainSignature::measurement("station", "time")
.with_components(vec!["sensor_type".into()]);
assert!(!sig1.can_stack_with(&sig2)); assert!(sig1.can_join_with(&sig2)); assert!(sig1.needs_component_reduction(&sig2));
}
#[test]
fn test_domain_signature_quality_vs_measurement() {
let quality_sig = DomainSignature::quality("station");
let measurement_sig = DomainSignature::measurement("station", "time");
assert!(!quality_sig.can_stack_with(&measurement_sig));
assert!(quality_sig.can_join_with(&measurement_sig));
}
}