scribe_core/
utils.rs

1//! Utility functions and helpers for the Scribe library.
2//!
3//! Provides common functionality used across the Scribe ecosystem,
4//! including path manipulation, string processing, and validation.
5
6use crate::error::{Result, ScribeError};
7use std::path::{Path, PathBuf};
8use std::time::{Duration, SystemTime, UNIX_EPOCH};
9
10/// Path utility functions
11pub mod path {
12    use super::*;
13
14    /// Normalize a path to use forward slashes (for cross-platform consistency)
15    pub fn normalize_path<P: AsRef<Path>>(path: P) -> String {
16        path.as_ref().to_string_lossy().replace('\\', "/")
17    }
18
19    /// Get relative path from base to target
20    pub fn relative_path<P1: AsRef<Path>, P2: AsRef<Path>>(
21        base: P1,
22        target: P2,
23    ) -> Result<PathBuf> {
24        let base_ref = base.as_ref();
25        let target_ref = target.as_ref();
26        let base = base_ref.canonicalize().map_err(|e| {
27            ScribeError::path_with_source("Failed to canonicalize base path", base_ref, e)
28        })?;
29        let target = target_ref.canonicalize().map_err(|e| {
30            ScribeError::path_with_source("Failed to canonicalize target path", target_ref, e)
31        })?;
32
33        target
34            .strip_prefix(&base)
35            .map(|p| p.to_path_buf())
36            .map_err(|_| ScribeError::path("Target path is not under base path", &target))
37    }
38
39    /// Check if path is under a given directory
40    pub fn is_under_directory<P1: AsRef<Path>, P2: AsRef<Path>>(path: P1, directory: P2) -> bool {
41        match relative_path(directory, path) {
42            Ok(rel_path) => !rel_path.to_string_lossy().starts_with(".."),
43            Err(_) => false,
44        }
45    }
46
47    /// Get the depth of a path (number of components)
48    pub fn path_depth<P: AsRef<Path>>(path: P) -> usize {
49        path.as_ref().components().count()
50    }
51
52    /// Check if path represents a hidden file or directory (starts with .)
53    pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
54        path.as_ref()
55            .file_name()
56            .and_then(|name| name.to_str())
57            .map(|name| name.starts_with('.'))
58            .unwrap_or(false)
59    }
60
61    /// Ensure directory exists, creating if necessary
62    pub fn ensure_dir_exists<P: AsRef<Path>>(path: P) -> Result<()> {
63        let path = path.as_ref();
64        if !path.exists() {
65            std::fs::create_dir_all(path).map_err(|e| {
66                ScribeError::path_with_source("Failed to create directory", path, e)
67            })?;
68        } else if !path.is_dir() {
69            return Err(ScribeError::path(
70                "Path exists but is not a directory",
71                path,
72            ));
73        }
74        Ok(())
75    }
76
77    /// Find the repository root by looking for common markers (.git, Cargo.toml, etc.)
78    pub fn find_repo_root<P: AsRef<Path>>(start_path: P) -> Option<PathBuf> {
79        const REPO_MARKERS: &[&str] = &[
80            ".git",
81            "Cargo.toml",
82            "package.json",
83            "pyproject.toml",
84            "setup.py",
85            "go.mod",
86            "pom.xml",
87            "build.gradle",
88            "Makefile",
89        ];
90
91        let mut current = start_path.as_ref();
92
93        loop {
94            for marker in REPO_MARKERS {
95                if current.join(marker).exists() {
96                    return Some(current.to_path_buf());
97                }
98            }
99
100            match current.parent() {
101                Some(parent) => current = parent,
102                None => return None,
103            }
104        }
105    }
106}
107
108/// String processing utilities
109pub mod string {
110    use super::*;
111
112    /// Truncate a string to a maximum length, adding ellipsis if needed
113    pub fn truncate(s: &str, max_len: usize) -> String {
114        if s.len() <= max_len {
115            s.to_string()
116        } else if max_len <= 3 {
117            "...".to_string()
118        } else {
119            format!("{}...", &s[..max_len - 3])
120        }
121    }
122
123    /// Remove common leading whitespace from all lines
124    pub fn dedent(s: &str) -> String {
125        let lines: Vec<&str> = s.lines().collect();
126        if lines.is_empty() {
127            return String::new();
128        }
129
130        // Find minimum indentation (ignoring empty lines)
131        let min_indent = lines
132            .iter()
133            .filter(|line| !line.trim().is_empty())
134            .map(|line| line.len() - line.trim_start().len())
135            .min()
136            .unwrap_or(0);
137
138        lines
139            .iter()
140            .map(|line| {
141                if line.trim().is_empty() {
142                    String::new()
143                } else {
144                    line.chars().skip(min_indent).collect()
145                }
146            })
147            .collect::<Vec<String>>()
148            .join("\n")
149    }
150
151    /// Count lines in a string
152    pub fn count_lines(s: &str) -> usize {
153        if s.is_empty() {
154            0
155        } else {
156            s.matches('\n').count() + 1
157        }
158    }
159
160    /// Check if string is likely binary content
161    pub fn is_likely_binary(s: &str) -> bool {
162        // Check for null bytes or high proportion of non-printable characters
163        let null_bytes = s.bytes().filter(|&b| b == 0).count();
164        if null_bytes > 0 {
165            return true;
166        }
167
168        let total_chars = s.chars().count();
169        if total_chars == 0 {
170            return false;
171        }
172
173        let non_printable = s
174            .chars()
175            .filter(|&c| {
176                !c.is_ascii_graphic()
177                    && !c.is_ascii_whitespace()
178                    && c != '\n'
179                    && c != '\r'
180                    && c != '\t'
181            })
182            .count();
183
184        // If more than 30% non-printable characters, likely binary
185        (non_printable as f64 / total_chars as f64) > 0.3
186    }
187
188    /// Extract identifier/name from a file path
189    pub fn extract_identifier<P: AsRef<Path>>(path: P) -> String {
190        path.as_ref()
191            .file_stem()
192            .and_then(|stem| stem.to_str())
193            .unwrap_or("unknown")
194            .to_string()
195    }
196
197    /// Convert snake_case to camelCase
198    pub fn snake_to_camel(s: &str) -> String {
199        let mut result = String::new();
200        let mut capitalize_next = false;
201
202        for c in s.chars() {
203            if c == '_' {
204                capitalize_next = true;
205            } else if capitalize_next {
206                result.push(c.to_uppercase().next().unwrap_or(c));
207                capitalize_next = false;
208            } else {
209                result.push(c);
210            }
211        }
212
213        result
214    }
215
216    /// Convert camelCase to snake_case
217    pub fn camel_to_snake(s: &str) -> String {
218        let mut result = String::new();
219
220        for (i, c) in s.char_indices() {
221            if c.is_uppercase() && i > 0 {
222                result.push('_');
223            }
224            result.push(c.to_lowercase().next().unwrap_or(c));
225        }
226
227        result
228    }
229}
230
231/// Time and duration utilities
232pub mod time {
233    use super::*;
234
235    /// Convert Duration to human-readable string
236    pub fn duration_to_human(duration: Duration) -> String {
237        let total_secs = duration.as_secs();
238        let millis = duration.subsec_millis();
239
240        if total_secs >= 3600 {
241            let hours = total_secs / 3600;
242            let mins = (total_secs % 3600) / 60;
243            let secs = total_secs % 60;
244            format!("{}h {}m {}s", hours, mins, secs)
245        } else if total_secs >= 60 {
246            let mins = total_secs / 60;
247            let secs = total_secs % 60;
248            format!("{}m {}s", mins, secs)
249        } else if total_secs > 0 {
250            format!("{}.{:03}s", total_secs, millis)
251        } else {
252            format!("{}ms", millis)
253        }
254    }
255
256    /// Get current timestamp as seconds since Unix epoch
257    pub fn current_timestamp() -> u64 {
258        SystemTime::now()
259            .duration_since(UNIX_EPOCH)
260            .unwrap_or_default()
261            .as_secs()
262    }
263
264    /// Convert SystemTime to timestamp
265    pub fn system_time_to_timestamp(time: SystemTime) -> u64 {
266        time.duration_since(UNIX_EPOCH)
267            .unwrap_or_default()
268            .as_secs()
269    }
270
271    /// Convert timestamp to SystemTime
272    pub fn timestamp_to_system_time(timestamp: u64) -> SystemTime {
273        UNIX_EPOCH + Duration::from_secs(timestamp)
274    }
275}
276
277/// Collection utilities
278pub mod collections {
279    use std::collections::HashMap;
280    use std::hash::Hash;
281
282    /// Count occurrences of items in an iterator
283    pub fn count_occurrences<T, I>(iter: I) -> HashMap<T, usize>
284    where
285        T: Eq + Hash,
286        I: Iterator<Item = T>,
287    {
288        let mut counts = HashMap::new();
289        for item in iter {
290            *counts.entry(item).or_insert(0) += 1;
291        }
292        counts
293    }
294
295    /// Find the most common item in an iterator
296    pub fn most_common<T, I>(iter: I) -> Option<T>
297    where
298        T: Eq + Hash + Clone,
299        I: Iterator<Item = T>,
300    {
301        let counts = count_occurrences(iter);
302        counts
303            .into_iter()
304            .max_by_key(|(_, count)| *count)
305            .map(|(item, _)| item)
306    }
307
308    /// Group items by a key function
309    pub fn group_by<T, K, F>(items: Vec<T>, key_fn: F) -> HashMap<K, Vec<T>>
310    where
311        K: Eq + Hash,
312        F: Fn(&T) -> K,
313    {
314        let mut groups = HashMap::new();
315        for item in items {
316            let key = key_fn(&item);
317            groups.entry(key).or_insert_with(Vec::new).push(item);
318        }
319        groups
320    }
321}
322
323/// Math and statistics utilities
324pub mod math {
325    /// Calculate mean of a slice of numbers
326    pub fn mean(values: &[f64]) -> f64 {
327        if values.is_empty() {
328            0.0
329        } else {
330            values.iter().sum::<f64>() / values.len() as f64
331        }
332    }
333
334    /// Calculate median of a slice of numbers
335    pub fn median(values: &mut [f64]) -> f64 {
336        if values.is_empty() {
337            return 0.0;
338        }
339
340        values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
341        let mid = values.len() / 2;
342
343        if values.len() % 2 == 0 {
344            (values[mid - 1] + values[mid]) / 2.0
345        } else {
346            values[mid]
347        }
348    }
349
350    /// Calculate standard deviation
351    pub fn std_deviation(values: &[f64]) -> f64 {
352        if values.len() < 2 {
353            return 0.0;
354        }
355
356        let mean_val = mean(values);
357        let variance = values
358            .iter()
359            .map(|x| {
360                let diff = x - mean_val;
361                diff * diff
362            })
363            .sum::<f64>()
364            / values.len() as f64;
365
366        variance.sqrt()
367    }
368
369    /// Normalize values to 0-1 range
370    pub fn normalize(values: &mut [f64]) {
371        if values.is_empty() {
372            return;
373        }
374
375        let min_val = values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
376        let max_val = values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
377        let range = max_val - min_val;
378
379        if range == 0.0 {
380            // All values are the same
381            values.iter_mut().for_each(|x| *x = 0.0);
382        } else {
383            values.iter_mut().for_each(|x| *x = (*x - min_val) / range);
384        }
385    }
386
387    /// Clamp a value between min and max
388    pub fn clamp(value: f64, min: f64, max: f64) -> f64 {
389        value.max(min).min(max)
390    }
391}
392
393/// Validation utilities
394pub mod validation {
395    use super::*;
396
397    /// Validate that a path exists and is readable
398    pub fn validate_readable_path<P: AsRef<Path>>(path: P) -> Result<()> {
399        let path = path.as_ref();
400        if !path.exists() {
401            return Err(ScribeError::path("Path does not exist", path));
402        }
403
404        // Try to read metadata to check if readable
405        std::fs::metadata(path)
406            .map_err(|e| ScribeError::path_with_source("Path is not readable", path, e))?;
407
408        Ok(())
409    }
410
411    /// Validate that a path is a directory
412    pub fn validate_directory<P: AsRef<Path>>(path: P) -> Result<()> {
413        let path = path.as_ref();
414        validate_readable_path(path)?;
415
416        if !path.is_dir() {
417            return Err(ScribeError::path("Path is not a directory", path));
418        }
419
420        Ok(())
421    }
422
423    /// Validate that a path is a file
424    pub fn validate_file<P: AsRef<Path>>(path: P) -> Result<()> {
425        let path = path.as_ref();
426        validate_readable_path(path)?;
427
428        if !path.is_file() {
429            return Err(ScribeError::path("Path is not a file", path));
430        }
431
432        Ok(())
433    }
434
435    /// Validate configuration values
436    pub fn validate_config_value<T>(value: T, min: T, max: T, field_name: &str) -> Result<T>
437    where
438        T: PartialOrd + std::fmt::Display + Copy,
439    {
440        if value < min || value > max {
441            return Err(ScribeError::config_field(
442                format!("{} must be between {} and {}", field_name, min, max),
443                field_name,
444            ));
445        }
446        Ok(value)
447    }
448}
449
450/// Hash generation utilities  
451pub mod hash {
452    use std::collections::hash_map::DefaultHasher;
453    use std::hash::{Hash, Hasher};
454
455    /// Generate a hash string from a hashable value
456    pub fn generate_hash<T: Hash>(value: &T) -> String {
457        let mut hasher = DefaultHasher::new();
458        value.hash(&mut hasher);
459        format!("{:x}", hasher.finish())
460    }
461
462    /// Generate a hash from file contents
463    pub fn hash_file_content(content: &str) -> String {
464        generate_hash(&content)
465    }
466
467    /// Generate a hash from multiple values
468    pub fn hash_multiple<T: Hash>(values: &[T]) -> String {
469        let mut hasher = DefaultHasher::new();
470        for value in values {
471            value.hash(&mut hasher);
472        }
473        format!("{:x}", hasher.finish())
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480
481    #[test]
482    fn test_path_normalize() {
483        let windows_path = r"src\lib\mod.rs";
484        let normalized = path::normalize_path(windows_path);
485        assert_eq!(normalized, "src/lib/mod.rs");
486    }
487
488    #[test]
489    fn test_path_depth() {
490        assert_eq!(path::path_depth("file.txt"), 1);
491        assert_eq!(path::path_depth("src/lib.rs"), 2);
492        assert_eq!(path::path_depth("src/nested/deep/file.rs"), 4);
493    }
494
495    #[test]
496    fn test_is_hidden() {
497        assert!(path::is_hidden(".gitignore"));
498        assert!(path::is_hidden(".cargo")); // Directory itself is hidden
499        assert!(!path::is_hidden("src/lib.rs"));
500        assert!(!path::is_hidden("README.md"));
501        // Test nested path - only checks the final component
502        assert!(!path::is_hidden(".cargo/config"));
503    }
504
505    #[test]
506    fn test_string_truncate() {
507        assert_eq!(string::truncate("hello", 10), "hello");
508        assert_eq!(string::truncate("hello world", 8), "hello...");
509        assert_eq!(string::truncate("hi", 2), "hi");
510        assert_eq!(string::truncate("hello", 3), "...");
511    }
512
513    #[test]
514    fn test_string_dedent() {
515        let indented = "    line 1\n    line 2\n        line 3";
516        let expected = "line 1\nline 2\n    line 3";
517        assert_eq!(string::dedent(indented), expected);
518    }
519
520    #[test]
521    fn test_count_lines() {
522        assert_eq!(string::count_lines(""), 0);
523        assert_eq!(string::count_lines("single line"), 1);
524        assert_eq!(string::count_lines("line 1\nline 2"), 2);
525        assert_eq!(string::count_lines("line 1\nline 2\n"), 3);
526    }
527
528    #[test]
529    fn test_case_conversion() {
530        assert_eq!(string::snake_to_camel("hello_world"), "helloWorld");
531        assert_eq!(string::snake_to_camel("test_case_name"), "testCaseName");
532
533        assert_eq!(string::camel_to_snake("helloWorld"), "hello_world");
534        assert_eq!(string::camel_to_snake("TestCaseName"), "test_case_name");
535    }
536
537    #[test]
538    fn test_binary_detection() {
539        assert!(!string::is_likely_binary("Hello world"));
540        assert!(!string::is_likely_binary("let x = 42;\nfn main() {}"));
541        assert!(string::is_likely_binary("Hello\x00world"));
542
543        // Test high proportion of non-printable characters
544        let mostly_non_printable = (0..100)
545            .map(|i| if i % 3 == 0 { 'a' } else { '\x01' })
546            .collect::<String>();
547        assert!(string::is_likely_binary(&mostly_non_printable));
548    }
549
550    #[test]
551    fn test_duration_formatting() {
552        assert_eq!(time::duration_to_human(Duration::from_millis(500)), "500ms");
553        assert_eq!(time::duration_to_human(Duration::from_secs(5)), "5.000s");
554        assert_eq!(time::duration_to_human(Duration::from_secs(65)), "1m 5s");
555        assert_eq!(
556            time::duration_to_human(Duration::from_secs(3661)),
557            "1h 1m 1s"
558        );
559    }
560
561    #[test]
562    fn test_math_functions() {
563        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
564        assert_eq!(math::mean(&values), 3.0);
565
566        let mut values_for_median = values.clone();
567        assert_eq!(math::median(&mut values_for_median), 3.0);
568
569        let std_dev = math::std_deviation(&values);
570        // Standard deviation of [1,2,3,4,5] is sqrt(2) ≈ 1.4142
571        assert!((std_dev - 1.4142135623730951).abs() < 1e-10);
572
573        assert_eq!(math::clamp(5.0, 2.0, 4.0), 4.0);
574        assert_eq!(math::clamp(1.0, 2.0, 4.0), 2.0);
575        assert_eq!(math::clamp(3.0, 2.0, 4.0), 3.0);
576    }
577
578    #[test]
579    fn test_collections_utilities() {
580        let items = vec!['a', 'b', 'a', 'c', 'a'];
581        let counts = collections::count_occurrences(items.iter().cloned());
582        assert_eq!(counts[&'a'], 3);
583        assert_eq!(counts[&'b'], 1);
584        assert_eq!(counts[&'c'], 1);
585
586        let most_common = collections::most_common(items.iter().cloned());
587        assert_eq!(most_common, Some('a'));
588    }
589
590    #[test]
591    fn test_hash_generation() {
592        let hash1 = hash::generate_hash(&"test string");
593        let hash2 = hash::generate_hash(&"test string");
594        let hash3 = hash::generate_hash(&"different string");
595
596        assert_eq!(hash1, hash2);
597        assert_ne!(hash1, hash3);
598    }
599}