use std::collections::{HashMap, HashSet};
use std::hash::Hash;
pub const DATE_UNITS: &[&str] = &["day", "week", "month", "quarter", "year", "year_month"];
pub fn seq_get<T>(seq: &[T], index: isize) -> Option<&T> {
let len = seq.len() as isize;
if len == 0 {
return None;
}
let actual_index = if index < 0 { len + index } else { index };
if actual_index < 0 || actual_index >= len {
None
} else {
seq.get(actual_index as usize)
}
}
pub trait EnsureList {
type Item;
fn ensure_list(self) -> Vec<Self::Item>;
}
impl<T> EnsureList for Vec<T> {
type Item = T;
fn ensure_list(self) -> Vec<Self::Item> {
self
}
}
impl<T> EnsureList for Option<T> {
type Item = T;
fn ensure_list(self) -> Vec<Self::Item> {
match self {
Some(v) => vec![v],
None => vec![],
}
}
}
pub fn ensure_list<T>(value: T) -> Vec<T> {
vec![value]
}
pub fn ensure_list_option<T>(value: Option<T>) -> Vec<T> {
match value {
Some(v) => vec![v],
None => vec![],
}
}
pub fn csv(args: &[&str], sep: &str) -> String {
args.iter()
.filter(|s| !s.is_empty())
.copied()
.collect::<Vec<_>>()
.join(sep)
}
pub fn csv_default(args: &[&str]) -> String {
csv(args, ", ")
}
pub fn while_changing<T, F>(mut value: T, func: F) -> T
where
T: Clone + PartialEq,
F: Fn(T) -> T,
{
loop {
let new_value = func(value.clone());
if new_value == value {
return new_value;
}
value = new_value;
}
}
pub fn while_changing_hash<T, F, H>(mut value: T, func: F, hasher: H) -> T
where
F: Fn(T) -> T,
H: Fn(&T) -> u64,
{
loop {
let start_hash = hasher(&value);
value = func(value);
let end_hash = hasher(&value);
if start_hash == end_hash {
return value;
}
}
}
pub fn tsort<T>(mut dag: HashMap<T, HashSet<T>>) -> Result<Vec<T>, TsortError>
where
T: Clone + Eq + Hash + Ord,
{
let mut result = Vec::new();
let all_deps: Vec<T> = dag.values().flat_map(|deps| deps.iter().cloned()).collect();
for dep in all_deps {
dag.entry(dep).or_insert_with(HashSet::new);
}
while !dag.is_empty() {
let mut current: Vec<T> = dag
.iter()
.filter(|(_, deps)| deps.is_empty())
.map(|(node, _)| node.clone())
.collect();
if current.is_empty() {
return Err(TsortError::CycleDetected);
}
current.sort();
for node in ¤t {
dag.remove(node);
}
let current_set: HashSet<_> = current.iter().cloned().collect();
for deps in dag.values_mut() {
*deps = deps.difference(¤t_set).cloned().collect();
}
result.extend(current);
}
Ok(result)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TsortError {
CycleDetected,
}
impl std::fmt::Display for TsortError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TsortError::CycleDetected => write!(f, "Cycle detected in DAG"),
}
}
}
impl std::error::Error for TsortError {}
pub fn find_new_name(taken: &HashSet<String>, base: &str) -> String {
if !taken.contains(base) {
return base.to_string();
}
let mut i = 2;
loop {
let new_name = format!("{}_{}", base, i);
if !taken.contains(&new_name) {
return new_name;
}
i += 1;
}
}
pub fn name_sequence(prefix: &str) -> impl FnMut() -> String {
let prefix = prefix.to_string();
let mut counter = 0usize;
move || {
let name = format!("{}{}", prefix, counter);
counter += 1;
name
}
}
pub fn is_int(text: &str) -> bool {
text.parse::<i64>().is_ok()
}
pub fn is_float(text: &str) -> bool {
text.parse::<f64>().is_ok()
}
pub fn is_iso_date(text: &str) -> bool {
if text.len() != 10 {
return false;
}
let parts: Vec<&str> = text.split('-').collect();
if parts.len() != 3 {
return false;
}
if parts[0].len() != 4 || parts[1].len() != 2 || parts[2].len() != 2 {
return false;
}
let year: u32 = match parts[0].parse() {
Ok(y) => y,
Err(_) => return false,
};
let month: u32 = match parts[1].parse() {
Ok(m) => m,
Err(_) => return false,
};
let day: u32 = match parts[2].parse() {
Ok(d) => d,
Err(_) => return false,
};
if month < 1 || month > 12 {
return false;
}
if day < 1 || day > 31 {
return false;
}
let days_in_month = match month {
1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
4 | 6 | 9 | 11 => 30,
2 => {
if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
29
} else {
28
}
}
_ => return false,
};
day <= days_in_month
}
pub fn is_iso_datetime(text: &str) -> bool {
if text.len() < 10 {
return false;
}
if !is_iso_date(&text[..10]) {
return false;
}
if text.len() > 10 {
let sep = text.chars().nth(10).expect("length checked above");
if sep != 'T' && sep != ' ' {
return false;
}
let time_str = &text[11..];
let time_end = time_str
.find('+')
.or_else(|| time_str.rfind('-'))
.or_else(|| time_str.find('Z'))
.unwrap_or(time_str.len());
let time_without_tz = &time_str[..time_end];
let (time_part, _frac_part) = match time_without_tz.find('.') {
Some(idx) => (&time_without_tz[..idx], Some(&time_without_tz[idx + 1..])),
None => (time_without_tz, None),
};
if time_part.len() < 8 {
if time_part.len() != 5 {
return false;
}
}
let parts: Vec<&str> = time_part.split(':').collect();
if parts.len() < 2 || parts.len() > 3 {
return false;
}
let hour: u32 = match parts[0].parse() {
Ok(h) => h,
Err(_) => return false,
};
let minute: u32 = match parts[1].parse() {
Ok(m) => m,
Err(_) => return false,
};
if hour > 23 || minute > 59 {
return false;
}
if parts.len() == 3 {
let second: u32 = match parts[2].parse() {
Ok(s) => s,
Err(_) => return false,
};
if second > 59 {
return false;
}
}
}
true
}
pub fn camel_to_snake_case(name: &str) -> String {
let mut result = String::with_capacity(name.len() + 4);
for (i, ch) in name.chars().enumerate() {
if ch.is_uppercase() && i > 0 {
result.push('_');
}
result.push(ch.to_ascii_uppercase());
}
result
}
pub fn snake_to_camel_case(name: &str) -> String {
let mut result = String::with_capacity(name.len());
let mut capitalize_next = true;
for ch in name.chars() {
if ch == '_' {
capitalize_next = true;
} else if capitalize_next {
result.push(ch.to_ascii_uppercase());
capitalize_next = false;
} else {
result.push(ch.to_ascii_lowercase());
}
}
result
}
pub fn dict_depth<K, V>(d: &HashMap<K, V>) -> usize
where
K: std::hash::Hash + Eq,
{
if d.is_empty() {
1
} else {
1
}
}
pub fn first<I, T>(mut iter: I) -> Option<T>
where
I: Iterator<Item = T>,
{
iter.next()
}
pub fn split_num_words(
value: &str,
sep: &str,
min_num_words: usize,
fill_from_start: bool,
) -> Vec<Option<String>> {
let words: Vec<String> = value.split(sep).map(|s| s.to_string()).collect();
let num_words = words.len();
if num_words >= min_num_words {
return words.into_iter().map(Some).collect();
}
let padding = min_num_words - num_words;
let mut result = Vec::with_capacity(min_num_words);
if fill_from_start {
result.extend(std::iter::repeat(None).take(padding));
result.extend(words.into_iter().map(Some));
} else {
result.extend(words.into_iter().map(Some));
result.extend(std::iter::repeat(None).take(padding));
}
result
}
pub fn flatten<T: Clone>(values: &[Vec<T>]) -> Vec<T> {
values.iter().flat_map(|v| v.iter().cloned()).collect()
}
pub fn merge_ranges<T: Ord + Copy>(mut ranges: Vec<(T, T)>) -> Vec<(T, T)> {
if ranges.is_empty() {
return vec![];
}
ranges.sort_by(|a, b| a.0.cmp(&b.0));
let mut merged = vec![ranges[0]];
for (start, end) in ranges.into_iter().skip(1) {
let last = merged
.last_mut()
.expect("merged initialized with at least one element");
if start <= last.1 {
last.1 = std::cmp::max(last.1, end);
} else {
merged.push((start, end));
}
}
merged
}
pub fn is_date_unit(unit: &str) -> bool {
DATE_UNITS.contains(&unit.to_lowercase().as_str())
}
pub fn apply_index_offset(expression: &str, offset: i64) -> Option<String> {
if offset == 0 {
return Some(expression.to_string());
}
if let Ok(value) = expression.parse::<i64>() {
return Some((value + offset).to_string());
}
None
}
#[derive(Debug, Clone)]
pub struct SingleValuedMapping<K, V>
where
K: Eq + Hash,
{
keys: HashSet<K>,
value: V,
}
impl<K, V> SingleValuedMapping<K, V>
where
K: Eq + Hash,
{
pub fn new(keys: HashSet<K>, value: V) -> Self {
Self { keys, value }
}
pub fn from_iter<I: IntoIterator<Item = K>>(keys: I, value: V) -> Self {
Self {
keys: keys.into_iter().collect(),
value,
}
}
pub fn get(&self, key: &K) -> Option<&V> {
if self.keys.contains(key) {
Some(&self.value)
} else {
None
}
}
pub fn contains_key(&self, key: &K) -> bool {
self.keys.contains(key)
}
pub fn len(&self) -> usize {
self.keys.len()
}
pub fn is_empty(&self) -> bool {
self.keys.is_empty()
}
pub fn keys(&self) -> impl Iterator<Item = &K> {
self.keys.iter()
}
pub fn value(&self) -> &V {
&self.value
}
pub fn iter(&self) -> impl Iterator<Item = (&K, &V)> {
self.keys.iter().map(move |k| (k, &self.value))
}
}
pub fn to_bool(value: &str) -> Option<bool> {
let lower = value.to_lowercase();
match lower.as_str() {
"true" | "1" => Some(true),
"false" | "0" => Some(false),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_seq_get() {
let v = vec![1, 2, 3, 4, 5];
assert_eq!(seq_get(&v, 0), Some(&1));
assert_eq!(seq_get(&v, 4), Some(&5));
assert_eq!(seq_get(&v, 5), None);
assert_eq!(seq_get(&v, -1), Some(&5));
assert_eq!(seq_get(&v, -5), Some(&1));
assert_eq!(seq_get(&v, -6), None);
let empty: Vec<i32> = vec![];
assert_eq!(seq_get(&empty, 0), None);
assert_eq!(seq_get(&empty, -1), None);
}
#[test]
fn test_csv() {
assert_eq!(csv(&["a", "b", "c"], ", "), "a, b, c");
assert_eq!(csv(&["a", "", "c"], ", "), "a, c");
assert_eq!(csv(&["", "", ""], ", "), "");
assert_eq!(csv(&["a"], ", "), "a");
}
#[test]
fn test_while_changing() {
let result = while_changing(16, |n| if n % 2 == 0 { n / 2 } else { n });
assert_eq!(result, 1);
let result = while_changing(5, |n| if n % 2 == 0 { n / 2 } else { n });
assert_eq!(result, 5);
}
#[test]
fn test_tsort() {
let mut dag = HashMap::new();
dag.insert("a", HashSet::from(["b", "c"]));
dag.insert("b", HashSet::from(["c"]));
dag.insert("c", HashSet::new());
let sorted = tsort(dag).unwrap();
assert_eq!(sorted, vec!["c", "b", "a"]);
}
#[test]
fn test_tsort_cycle() {
let mut dag = HashMap::new();
dag.insert("a", HashSet::from(["b"]));
dag.insert("b", HashSet::from(["a"]));
let result = tsort(dag);
assert!(result.is_err());
}
#[test]
fn test_find_new_name() {
let taken = HashSet::from(["col".to_string(), "col_2".to_string()]);
assert_eq!(find_new_name(&taken, "col"), "col_3");
assert_eq!(find_new_name(&taken, "other"), "other");
let empty = HashSet::new();
assert_eq!(find_new_name(&empty, "col"), "col");
}
#[test]
fn test_name_sequence() {
let mut gen = name_sequence("a");
assert_eq!(gen(), "a0");
assert_eq!(gen(), "a1");
assert_eq!(gen(), "a2");
}
#[test]
fn test_is_int() {
assert!(is_int("123"));
assert!(is_int("-456"));
assert!(is_int("0"));
assert!(!is_int("12.34"));
assert!(!is_int("abc"));
assert!(!is_int(""));
}
#[test]
fn test_is_float() {
assert!(is_float("12.34"));
assert!(is_float("123"));
assert!(is_float("-1.5e10"));
assert!(is_float("0.0"));
assert!(!is_float("abc"));
assert!(!is_float(""));
}
#[test]
fn test_is_iso_date() {
assert!(is_iso_date("2023-01-15"));
assert!(is_iso_date("2024-02-29")); assert!(!is_iso_date("2023-02-29")); assert!(!is_iso_date("01-15-2023"));
assert!(!is_iso_date("2023-13-01")); assert!(!is_iso_date("2023-01-32")); assert!(!is_iso_date("not a date"));
}
#[test]
fn test_is_iso_datetime() {
assert!(is_iso_datetime("2023-01-15T10:30:00"));
assert!(is_iso_datetime("2023-01-15 10:30:00"));
assert!(is_iso_datetime("2023-01-15T10:30:00.123456"));
assert!(is_iso_datetime("2023-01-15T10:30:00+00:00"));
assert!(is_iso_datetime("2023-01-15"));
assert!(!is_iso_datetime("not a datetime"));
assert!(!is_iso_datetime("2023-01-15X10:30:00")); }
#[test]
fn test_camel_to_snake_case() {
assert_eq!(camel_to_snake_case("camelCase"), "CAMEL_CASE");
assert_eq!(camel_to_snake_case("PascalCase"), "PASCAL_CASE");
assert_eq!(camel_to_snake_case("simple"), "SIMPLE");
}
#[test]
fn test_snake_to_camel_case() {
assert_eq!(snake_to_camel_case("snake_case"), "SnakeCase");
assert_eq!(snake_to_camel_case("my_http_server"), "MyHttpServer");
assert_eq!(snake_to_camel_case("simple"), "Simple");
}
#[test]
fn test_split_num_words() {
assert_eq!(
split_num_words("db.table", ".", 3, true),
vec![None, Some("db".to_string()), Some("table".to_string())]
);
assert_eq!(
split_num_words("db.table", ".", 3, false),
vec![Some("db".to_string()), Some("table".to_string()), None]
);
assert_eq!(
split_num_words("catalog.db.table", ".", 3, true),
vec![
Some("catalog".to_string()),
Some("db".to_string()),
Some("table".to_string())
]
);
assert_eq!(
split_num_words("db.table", ".", 1, true),
vec![Some("db".to_string()), Some("table".to_string())]
);
}
#[test]
fn test_merge_ranges() {
assert_eq!(merge_ranges(vec![(1, 3), (2, 6)]), vec![(1, 6)]);
assert_eq!(
merge_ranges(vec![(1, 3), (2, 6), (8, 10)]),
vec![(1, 6), (8, 10)]
);
assert_eq!(merge_ranges(vec![(1, 5), (2, 3)]), vec![(1, 5)]);
assert_eq!(merge_ranges::<i32>(vec![]), vec![]);
}
#[test]
fn test_is_date_unit() {
assert!(is_date_unit("day"));
assert!(is_date_unit("MONTH"));
assert!(is_date_unit("Year"));
assert!(!is_date_unit("hour"));
assert!(!is_date_unit("minute"));
}
#[test]
fn test_apply_index_offset() {
assert_eq!(apply_index_offset("0", 1), Some("1".to_string()));
assert_eq!(apply_index_offset("5", 1), Some("6".to_string()));
assert_eq!(apply_index_offset("10", -1), Some("9".to_string()));
assert_eq!(apply_index_offset("5", 0), Some("5".to_string()));
assert_eq!(apply_index_offset("-1", 1), Some("0".to_string()));
assert_eq!(apply_index_offset("col", 1), None);
assert_eq!(apply_index_offset("1.5", 1), None);
assert_eq!(apply_index_offset("abc", 1), None);
}
#[test]
fn test_single_valued_mapping() {
let columns = HashSet::from(["id".to_string(), "name".to_string(), "email".to_string()]);
let mapping = SingleValuedMapping::new(columns, "users".to_string());
assert_eq!(mapping.get(&"id".to_string()), Some(&"users".to_string()));
assert_eq!(mapping.get(&"name".to_string()), Some(&"users".to_string()));
assert_eq!(
mapping.get(&"email".to_string()),
Some(&"users".to_string())
);
assert_eq!(mapping.get(&"unknown".to_string()), None);
assert_eq!(mapping.len(), 3);
assert!(!mapping.is_empty());
assert!(mapping.contains_key(&"id".to_string()));
assert!(!mapping.contains_key(&"unknown".to_string()));
assert_eq!(mapping.value(), &"users".to_string());
}
#[test]
fn test_single_valued_mapping_from_iter() {
let mapping = SingleValuedMapping::from_iter(vec!["a".to_string(), "b".to_string()], 42);
assert_eq!(mapping.get(&"a".to_string()), Some(&42));
assert_eq!(mapping.get(&"b".to_string()), Some(&42));
assert_eq!(mapping.len(), 2);
}
#[test]
fn test_to_bool() {
assert_eq!(to_bool("true"), Some(true));
assert_eq!(to_bool("TRUE"), Some(true));
assert_eq!(to_bool("True"), Some(true));
assert_eq!(to_bool("1"), Some(true));
assert_eq!(to_bool("false"), Some(false));
assert_eq!(to_bool("FALSE"), Some(false));
assert_eq!(to_bool("False"), Some(false));
assert_eq!(to_bool("0"), Some(false));
assert_eq!(to_bool("maybe"), None);
assert_eq!(to_bool("yes"), None);
assert_eq!(to_bool("no"), None);
}
}