dsq_shared/
lib.rs

1//! dsq-shared: Shared types and utilities for DSQ crates
2//!
3//! This crate contains common types, traits, and utilities used across
4//! multiple DSQ crates to avoid code duplication and ensure consistency.
5//!
6//! # Features
7//!
8//! - **Common Result Type**: Standardized Result type alias
9//! - **Error Utilities**: Common error handling patterns and traits
10//! - **Version Information**: Build and version metadata
11//! - **Common Types**: Shared data structures and enums
12
13/// Result type alias for DSQ operations
14pub type Result<T> = anyhow::Result<T>;
15
16/// Version information
17pub const VERSION: &str = env!("CARGO_PKG_VERSION");
18
19/// Build information structure
20#[derive(Debug, Clone)]
21pub struct BuildInfo {
22    /// Package version
23    pub version: &'static str,
24    /// Git commit hash (if available)
25    pub git_hash: Option<&'static str>,
26    /// Build timestamp (if available)
27    pub build_date: Option<&'static str>,
28    /// Rust compiler version (if available)
29    pub rust_version: Option<&'static str>,
30    /// Enabled features
31    pub features: &'static [&'static str],
32}
33
34impl std::fmt::Display for BuildInfo {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        writeln!(f, "dsq-shared {}", self.version)?;
37
38        if let Some(hash) = self.git_hash {
39            writeln!(f, "Git hash: {hash}")?;
40        }
41
42        if let Some(date) = self.build_date {
43            writeln!(f, "Built: {date}")?;
44        }
45
46        if let Some(rust_ver) = self.rust_version {
47            writeln!(f, "Rust: {rust_ver}")?;
48        }
49
50        if !self.features.is_empty() {
51            writeln!(f, "Features: {}", self.features.join(", "))?;
52        }
53
54        Ok(())
55    }
56}
57
58/// Common error handling utilities
59pub mod error {
60    /// Create a generic operation error
61    pub fn operation_error(msg: impl Into<String>) -> anyhow::Error {
62        anyhow::anyhow!("Operation error: {}", msg.into())
63    }
64
65    /// Create a configuration error
66    pub fn config_error(msg: impl Into<String>) -> anyhow::Error {
67        anyhow::anyhow!("Configuration error: {}", msg.into())
68    }
69}
70
71/// Core value types for data processing
72pub mod value;
73
74/// Core operations for data processing
75pub mod ops;
76
77// Re-export commonly used functions
78pub use value::is_truthy;
79
80/// Common utility functions
81pub mod utils {
82    use std::collections::HashMap;
83
84    /// Create a `HashMap` from key-value pairs
85    pub fn hashmap<K, V, I>(pairs: I) -> HashMap<K, V>
86    where
87        I: IntoIterator<Item = (K, V)>,
88        K: std::hash::Hash + Eq,
89    {
90        pairs.into_iter().collect()
91    }
92
93    /// Check if a string is empty or whitespace-only
94    #[must_use]
95    pub fn is_blank(s: &str) -> bool {
96        s.trim().is_empty()
97    }
98
99    /// Capitalize the first character of a string
100    #[must_use]
101    pub fn capitalize_first(s: &str) -> String {
102        let mut chars = s.chars();
103        match chars.next() {
104            None => String::new(),
105            Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
106        }
107    }
108}
109
110/// Common constants
111pub mod constants {
112    /// Default batch size for operations
113    pub const DEFAULT_BATCH_SIZE: usize = 1000;
114
115    /// Maximum allowed batch size
116    pub const MAX_BATCH_SIZE: usize = 100_000;
117
118    /// Default buffer size for I/O operations
119    pub const DEFAULT_BUFFER_SIZE: usize = 8192;
120
121    /// Small buffer size for specialized operations
122    pub const SMALL_BUFFER_SIZE: usize = 1024;
123
124    /// Large buffer size for file operations
125    pub const LARGE_BUFFER_SIZE: usize = 128 * 1024; // 128KB
126
127    /// Default schema inference length for data format detection
128    pub const DEFAULT_SCHEMA_INFERENCE_LENGTH: usize = 1000;
129
130    /// Maximum recursion depth for filter execution
131    pub const MAX_RECURSION_DEPTH: usize = 1000;
132
133    /// Default batch size for high-throughput operations
134    pub const HIGH_THROUGHPUT_BATCH_SIZE: usize = 10000;
135
136    /// Field separator for ADT format (ASCII 31 - Unit Separator)
137    pub const FIELD_SEPARATOR: u8 = 31;
138
139    /// Record separator for ADT format (ASCII 30 - Record Separator)
140    pub const RECORD_SEPARATOR: u8 = 30;
141
142    /// Sample size for content detection
143    pub const CONTENT_SAMPLE_SIZE: usize = 4096;
144
145    /// Default memory limit for operations (1GB)
146    pub const DEFAULT_MEMORY_LIMIT: usize = 1024 * 1024 * 1024;
147
148    /// Maximum memory file size (100MB)
149    pub const MAX_MEMORY_FILE_SIZE: usize = 100 * 1024 * 1024;
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    #[test]
157    fn test_version_info() {
158        // Version should be a valid semver-like string
159        assert!(VERSION.contains('.'));
160    }
161
162    #[test]
163    fn test_build_info_display_full() {
164        let build_info = BuildInfo {
165            version: "1.0.0",
166            git_hash: Some("abc123"),
167            build_date: Some("2023-01-01"),
168            rust_version: Some("1.70.0"),
169            features: &["default", "serde"],
170        };
171
172        let display = format!("{}", build_info);
173        assert!(display.contains("dsq-shared 1.0.0"));
174        assert!(display.contains("Git hash: abc123"));
175        assert!(display.contains("Built: 2023-01-01"));
176        assert!(display.contains("Rust: 1.70.0"));
177        assert!(display.contains("Features: default, serde"));
178    }
179
180    #[test]
181    fn test_build_info_display_minimal() {
182        let build_info = BuildInfo {
183            version: "2.0.0",
184            git_hash: None,
185            build_date: None,
186            rust_version: None,
187            features: &[],
188        };
189
190        let display = format!("{}", build_info);
191        assert!(display.contains("dsq-shared 2.0.0"));
192        assert!(!display.contains("Git hash:"));
193        assert!(!display.contains("Built:"));
194        assert!(!display.contains("Rust:"));
195        assert!(!display.contains("Features:"));
196    }
197
198    #[test]
199    fn test_build_info_display_partial() {
200        let build_info = BuildInfo {
201            version: "1.5.0",
202            git_hash: Some("def456"),
203            build_date: None,
204            rust_version: Some("1.75.0"),
205            features: &[],
206        };
207
208        let display = format!("{}", build_info);
209        assert!(display.contains("dsq-shared 1.5.0"));
210        assert!(display.contains("Git hash: def456"));
211        assert!(display.contains("Rust: 1.75.0"));
212        assert!(!display.contains("Built:"));
213        assert!(!display.contains("Features:"));
214    }
215
216    #[test]
217    fn test_error_functions() {
218        let err = error::operation_error("test message");
219        assert!(err.to_string().contains("Operation error: test message"));
220
221        let err = error::config_error("config issue");
222        assert!(err
223            .to_string()
224            .contains("Configuration error: config issue"));
225    }
226
227    #[test]
228    fn test_utils_hashmap() {
229        let map = utils::hashmap([("key1", 1), ("key2", 2)]);
230        assert_eq!(map.get("key1"), Some(&1));
231        assert_eq!(map.get("key2"), Some(&2));
232        assert_eq!(map.len(), 2);
233        assert_eq!(map.get("nonexistent"), None);
234    }
235
236    #[test]
237    fn test_utils_hashmap_empty() {
238        let map: std::collections::HashMap<&str, i32> = utils::hashmap([]);
239        assert!(map.is_empty());
240    }
241
242    #[test]
243    fn test_utils_is_blank() {
244        assert!(utils::is_blank(""));
245        assert!(utils::is_blank("   "));
246        assert!(utils::is_blank("\t\n"));
247        assert!(utils::is_blank(" \t \n "));
248        assert!(!utils::is_blank("hello"));
249        assert!(!utils::is_blank(" hello "));
250        assert!(!utils::is_blank("a"));
251        assert!(!utils::is_blank("0"));
252    }
253
254    #[test]
255    fn test_utils_capitalize_first() {
256        assert_eq!(utils::capitalize_first("hello"), "Hello");
257        assert_eq!(utils::capitalize_first("HELLO"), "HELLO");
258        assert_eq!(utils::capitalize_first(""), "");
259        assert_eq!(utils::capitalize_first("a"), "A");
260        assert_eq!(utils::capitalize_first("123"), "123");
261        assert_eq!(utils::capitalize_first(" hello"), " hello");
262        assert_eq!(utils::capitalize_first("ñandu"), "Ñandu"); // Unicode test
263    }
264
265    #[test]
266    fn test_constants() {
267        assert_eq!(constants::DEFAULT_BATCH_SIZE, 1000);
268        assert_eq!(constants::MAX_BATCH_SIZE, 100_000);
269        assert_eq!(constants::DEFAULT_BUFFER_SIZE, 8192);
270        assert_eq!(constants::SMALL_BUFFER_SIZE, 1024);
271        assert_eq!(constants::LARGE_BUFFER_SIZE, 128 * 1024);
272        assert_eq!(constants::DEFAULT_SCHEMA_INFERENCE_LENGTH, 1000);
273        assert_eq!(constants::MAX_RECURSION_DEPTH, 1000);
274        assert_eq!(constants::HIGH_THROUGHPUT_BATCH_SIZE, 10000);
275        assert_eq!(constants::FIELD_SEPARATOR, 31u8);
276        assert_eq!(constants::RECORD_SEPARATOR, 30u8);
277        assert_eq!(constants::CONTENT_SAMPLE_SIZE, 4096);
278        assert_eq!(constants::DEFAULT_MEMORY_LIMIT, 1024 * 1024 * 1024);
279        assert_eq!(constants::MAX_MEMORY_FILE_SIZE, 100 * 1024 * 1024);
280
281        // Sanity checks
282        // All constants are validated at compile time through their definitions
283    }
284}