scribe_core/
utils.rs

1//! Utility functions and helpers for the Scribe library.
2//!
3//! Provides common functionality used across the Scribe ecosystem,
4//! including path manipulation, string processing, and validation.
5
6use std::path::{Path, PathBuf};
7use std::time::{Duration, SystemTime, UNIX_EPOCH};
8use crate::error::{Result, ScribeError};
9
10/// Path utility functions
11pub mod path {
12    use super::*;
13
14    /// Normalize a path to use forward slashes (for cross-platform consistency)
15    pub fn normalize_path<P: AsRef<Path>>(path: P) -> String {
16        path.as_ref()
17            .to_string_lossy()
18            .replace('\\', "/")
19    }
20
21    /// Get relative path from base to target
22    pub fn relative_path<P1: AsRef<Path>, P2: AsRef<Path>>(
23        base: P1,
24        target: P2,
25    ) -> Result<PathBuf> {
26        let base_ref = base.as_ref();
27        let target_ref = target.as_ref();
28        let base = base_ref.canonicalize()
29            .map_err(|e| ScribeError::path_with_source("Failed to canonicalize base path", base_ref, e))?;
30        let target = target_ref.canonicalize()
31            .map_err(|e| ScribeError::path_with_source("Failed to canonicalize target path", target_ref, e))?;
32
33        target.strip_prefix(&base)
34            .map(|p| p.to_path_buf())
35            .map_err(|_| ScribeError::path(
36                "Target path is not under base path", 
37                &target
38            ))
39    }
40
41    /// Check if path is under a given directory
42    pub fn is_under_directory<P1: AsRef<Path>, P2: AsRef<Path>>(
43        path: P1,
44        directory: P2,
45    ) -> bool {
46        match relative_path(directory, path) {
47            Ok(rel_path) => !rel_path.to_string_lossy().starts_with(".."),
48            Err(_) => false,
49        }
50    }
51
52    /// Get the depth of a path (number of components)
53    pub fn path_depth<P: AsRef<Path>>(path: P) -> usize {
54        path.as_ref().components().count()
55    }
56
57    /// Check if path represents a hidden file or directory (starts with .)
58    pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
59        path.as_ref()
60            .file_name()
61            .and_then(|name| name.to_str())
62            .map(|name| name.starts_with('.'))
63            .unwrap_or(false)
64    }
65
66    /// Ensure directory exists, creating if necessary
67    pub fn ensure_dir_exists<P: AsRef<Path>>(path: P) -> Result<()> {
68        let path = path.as_ref();
69        if !path.exists() {
70            std::fs::create_dir_all(path)
71                .map_err(|e| ScribeError::path_with_source(
72                    "Failed to create directory", 
73                    path, 
74                    e
75                ))?;
76        } else if !path.is_dir() {
77            return Err(ScribeError::path("Path exists but is not a directory", path));
78        }
79        Ok(())
80    }
81
82    /// Find the repository root by looking for common markers (.git, Cargo.toml, etc.)
83    pub fn find_repo_root<P: AsRef<Path>>(start_path: P) -> Option<PathBuf> {
84        const REPO_MARKERS: &[&str] = &[
85            ".git",
86            "Cargo.toml",
87            "package.json",
88            "pyproject.toml",
89            "setup.py",
90            "go.mod",
91            "pom.xml",
92            "build.gradle",
93            "Makefile",
94        ];
95
96        let mut current = start_path.as_ref();
97        
98        loop {
99            for marker in REPO_MARKERS {
100                if current.join(marker).exists() {
101                    return Some(current.to_path_buf());
102                }
103            }
104            
105            match current.parent() {
106                Some(parent) => current = parent,
107                None => return None,
108            }
109        }
110    }
111}
112
113/// String processing utilities
114pub mod string {
115    use super::*;
116
117    /// Truncate a string to a maximum length, adding ellipsis if needed
118    pub fn truncate(s: &str, max_len: usize) -> String {
119        if s.len() <= max_len {
120            s.to_string()
121        } else if max_len <= 3 {
122            "...".to_string()
123        } else {
124            format!("{}...", &s[..max_len - 3])
125        }
126    }
127
128    /// Remove common leading whitespace from all lines
129    pub fn dedent(s: &str) -> String {
130        let lines: Vec<&str> = s.lines().collect();
131        if lines.is_empty() {
132            return String::new();
133        }
134
135        // Find minimum indentation (ignoring empty lines)
136        let min_indent = lines
137            .iter()
138            .filter(|line| !line.trim().is_empty())
139            .map(|line| line.len() - line.trim_start().len())
140            .min()
141            .unwrap_or(0);
142
143        lines
144            .iter()
145            .map(|line| {
146                if line.trim().is_empty() {
147                    String::new()
148                } else {
149                    line.chars().skip(min_indent).collect()
150                }
151            })
152            .collect::<Vec<String>>()
153            .join("\n")
154    }
155
156    /// Count lines in a string
157    pub fn count_lines(s: &str) -> usize {
158        if s.is_empty() {
159            0
160        } else {
161            s.matches('\n').count() + 1
162        }
163    }
164
165    /// Check if string is likely binary content
166    pub fn is_likely_binary(s: &str) -> bool {
167        // Check for null bytes or high proportion of non-printable characters
168        let null_bytes = s.bytes().filter(|&b| b == 0).count();
169        if null_bytes > 0 {
170            return true;
171        }
172
173        let total_chars = s.chars().count();
174        if total_chars == 0 {
175            return false;
176        }
177
178        let non_printable = s.chars()
179            .filter(|&c| {
180                !c.is_ascii_graphic() && 
181                !c.is_ascii_whitespace() && 
182                c != '\n' && 
183                c != '\r' && 
184                c != '\t'
185            })
186            .count();
187
188        // If more than 30% non-printable characters, likely binary
189        (non_printable as f64 / total_chars as f64) > 0.3
190    }
191
192    /// Extract identifier/name from a file path
193    pub fn extract_identifier<P: AsRef<Path>>(path: P) -> String {
194        path.as_ref()
195            .file_stem()
196            .and_then(|stem| stem.to_str())
197            .unwrap_or("unknown")
198            .to_string()
199    }
200
201    /// Convert snake_case to camelCase
202    pub fn snake_to_camel(s: &str) -> String {
203        let mut result = String::new();
204        let mut capitalize_next = false;
205
206        for c in s.chars() {
207            if c == '_' {
208                capitalize_next = true;
209            } else if capitalize_next {
210                result.push(c.to_uppercase().next().unwrap_or(c));
211                capitalize_next = false;
212            } else {
213                result.push(c);
214            }
215        }
216
217        result
218    }
219
220    /// Convert camelCase to snake_case
221    pub fn camel_to_snake(s: &str) -> String {
222        let mut result = String::new();
223        
224        for (i, c) in s.char_indices() {
225            if c.is_uppercase() && i > 0 {
226                result.push('_');
227            }
228            result.push(c.to_lowercase().next().unwrap_or(c));
229        }
230        
231        result
232    }
233}
234
235/// Time and duration utilities
236pub mod time {
237    use super::*;
238
239    /// Convert Duration to human-readable string
240    pub fn duration_to_human(duration: Duration) -> String {
241        let total_secs = duration.as_secs();
242        let millis = duration.subsec_millis();
243
244        if total_secs >= 3600 {
245            let hours = total_secs / 3600;
246            let mins = (total_secs % 3600) / 60;
247            let secs = total_secs % 60;
248            format!("{}h {}m {}s", hours, mins, secs)
249        } else if total_secs >= 60 {
250            let mins = total_secs / 60;
251            let secs = total_secs % 60;
252            format!("{}m {}s", mins, secs)
253        } else if total_secs > 0 {
254            format!("{}.{:03}s", total_secs, millis)
255        } else {
256            format!("{}ms", millis)
257        }
258    }
259
260    /// Get current timestamp as seconds since Unix epoch
261    pub fn current_timestamp() -> u64 {
262        SystemTime::now()
263            .duration_since(UNIX_EPOCH)
264            .unwrap_or_default()
265            .as_secs()
266    }
267
268    /// Convert SystemTime to timestamp
269    pub fn system_time_to_timestamp(time: SystemTime) -> u64 {
270        time.duration_since(UNIX_EPOCH)
271            .unwrap_or_default()
272            .as_secs()
273    }
274
275    /// Convert timestamp to SystemTime
276    pub fn timestamp_to_system_time(timestamp: u64) -> SystemTime {
277        UNIX_EPOCH + Duration::from_secs(timestamp)
278    }
279}
280
281/// Collection utilities
282pub mod collections {
283    use std::collections::HashMap;
284    use std::hash::Hash;
285
286    /// Count occurrences of items in an iterator
287    pub fn count_occurrences<T, I>(iter: I) -> HashMap<T, usize>
288    where
289        T: Eq + Hash,
290        I: Iterator<Item = T>,
291    {
292        let mut counts = HashMap::new();
293        for item in iter {
294            *counts.entry(item).or_insert(0) += 1;
295        }
296        counts
297    }
298
299    /// Find the most common item in an iterator
300    pub fn most_common<T, I>(iter: I) -> Option<T>
301    where
302        T: Eq + Hash + Clone,
303        I: Iterator<Item = T>,
304    {
305        let counts = count_occurrences(iter);
306        counts
307            .into_iter()
308            .max_by_key(|(_, count)| *count)
309            .map(|(item, _)| item)
310    }
311
312    /// Group items by a key function
313    pub fn group_by<T, K, F>(items: Vec<T>, key_fn: F) -> HashMap<K, Vec<T>>
314    where
315        K: Eq + Hash,
316        F: Fn(&T) -> K,
317    {
318        let mut groups = HashMap::new();
319        for item in items {
320            let key = key_fn(&item);
321            groups.entry(key).or_insert_with(Vec::new).push(item);
322        }
323        groups
324    }
325}
326
327/// Math and statistics utilities
328pub mod math {
329    /// Calculate mean of a slice of numbers
330    pub fn mean(values: &[f64]) -> f64 {
331        if values.is_empty() {
332            0.0
333        } else {
334            values.iter().sum::<f64>() / values.len() as f64
335        }
336    }
337
338    /// Calculate median of a slice of numbers
339    pub fn median(values: &mut [f64]) -> f64 {
340        if values.is_empty() {
341            return 0.0;
342        }
343
344        values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
345        let mid = values.len() / 2;
346
347        if values.len() % 2 == 0 {
348            (values[mid - 1] + values[mid]) / 2.0
349        } else {
350            values[mid]
351        }
352    }
353
354    /// Calculate standard deviation
355    pub fn std_deviation(values: &[f64]) -> f64 {
356        if values.len() < 2 {
357            return 0.0;
358        }
359
360        let mean_val = mean(values);
361        let variance = values
362            .iter()
363            .map(|x| {
364                let diff = x - mean_val;
365                diff * diff
366            })
367            .sum::<f64>() / values.len() as f64;
368
369        variance.sqrt()
370    }
371
372    /// Normalize values to 0-1 range
373    pub fn normalize(values: &mut [f64]) {
374        if values.is_empty() {
375            return;
376        }
377
378        let min_val = values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
379        let max_val = values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
380        let range = max_val - min_val;
381
382        if range == 0.0 {
383            // All values are the same
384            values.iter_mut().for_each(|x| *x = 0.0);
385        } else {
386            values.iter_mut().for_each(|x| *x = (*x - min_val) / range);
387        }
388    }
389
390    /// Clamp a value between min and max
391    pub fn clamp(value: f64, min: f64, max: f64) -> f64 {
392        value.max(min).min(max)
393    }
394}
395
396/// Validation utilities
397pub mod validation {
398    use super::*;
399
400    /// Validate that a path exists and is readable
401    pub fn validate_readable_path<P: AsRef<Path>>(path: P) -> Result<()> {
402        let path = path.as_ref();
403        if !path.exists() {
404            return Err(ScribeError::path("Path does not exist", path));
405        }
406
407        // Try to read metadata to check if readable
408        std::fs::metadata(path)
409            .map_err(|e| ScribeError::path_with_source("Path is not readable", path, e))?;
410
411        Ok(())
412    }
413
414    /// Validate that a path is a directory
415    pub fn validate_directory<P: AsRef<Path>>(path: P) -> Result<()> {
416        let path = path.as_ref();
417        validate_readable_path(path)?;
418        
419        if !path.is_dir() {
420            return Err(ScribeError::path("Path is not a directory", path));
421        }
422
423        Ok(())
424    }
425
426    /// Validate that a path is a file
427    pub fn validate_file<P: AsRef<Path>>(path: P) -> Result<()> {
428        let path = path.as_ref();
429        validate_readable_path(path)?;
430        
431        if !path.is_file() {
432            return Err(ScribeError::path("Path is not a file", path));
433        }
434
435        Ok(())
436    }
437
438    /// Validate configuration values
439    pub fn validate_config_value<T>(
440        value: T,
441        min: T,
442        max: T,
443        field_name: &str,
444    ) -> Result<T>
445    where
446        T: PartialOrd + std::fmt::Display + Copy,
447    {
448        if value < min || value > max {
449            return Err(ScribeError::config_field(
450                format!("{} must be between {} and {}", field_name, min, max),
451                field_name,
452            ));
453        }
454        Ok(value)
455    }
456}
457
458/// Hash generation utilities  
459pub mod hash {
460    use std::collections::hash_map::DefaultHasher;
461    use std::hash::{Hash, Hasher};
462
463    /// Generate a hash string from a hashable value
464    pub fn generate_hash<T: Hash>(value: &T) -> String {
465        let mut hasher = DefaultHasher::new();
466        value.hash(&mut hasher);
467        format!("{:x}", hasher.finish())
468    }
469
470    /// Generate a hash from file contents
471    pub fn hash_file_content(content: &str) -> String {
472        generate_hash(&content)
473    }
474
475    /// Generate a hash from multiple values
476    pub fn hash_multiple<T: Hash>(values: &[T]) -> String {
477        let mut hasher = DefaultHasher::new();
478        for value in values {
479            value.hash(&mut hasher);
480        }
481        format!("{:x}", hasher.finish())
482    }
483}
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488
489    #[test]
490    fn test_path_normalize() {
491        let windows_path = r"src\lib\mod.rs";
492        let normalized = path::normalize_path(windows_path);
493        assert_eq!(normalized, "src/lib/mod.rs");
494    }
495
496    #[test]
497    fn test_path_depth() {
498        assert_eq!(path::path_depth("file.txt"), 1);
499        assert_eq!(path::path_depth("src/lib.rs"), 2);
500        assert_eq!(path::path_depth("src/nested/deep/file.rs"), 4);
501    }
502
503    #[test]
504    fn test_is_hidden() {
505        assert!(path::is_hidden(".gitignore"));
506        assert!(path::is_hidden(".cargo")); // Directory itself is hidden
507        assert!(!path::is_hidden("src/lib.rs"));
508        assert!(!path::is_hidden("README.md"));
509        // Test nested path - only checks the final component
510        assert!(!path::is_hidden(".cargo/config"));
511    }
512
513    #[test]
514    fn test_string_truncate() {
515        assert_eq!(string::truncate("hello", 10), "hello");
516        assert_eq!(string::truncate("hello world", 8), "hello...");
517        assert_eq!(string::truncate("hi", 2), "hi");
518        assert_eq!(string::truncate("hello", 3), "...");
519    }
520
521    #[test]
522    fn test_string_dedent() {
523        let indented = "    line 1\n    line 2\n        line 3";
524        let expected = "line 1\nline 2\n    line 3";
525        assert_eq!(string::dedent(indented), expected);
526    }
527
528    #[test]
529    fn test_count_lines() {
530        assert_eq!(string::count_lines(""), 0);
531        assert_eq!(string::count_lines("single line"), 1);
532        assert_eq!(string::count_lines("line 1\nline 2"), 2);
533        assert_eq!(string::count_lines("line 1\nline 2\n"), 3);
534    }
535
536    #[test]
537    fn test_case_conversion() {
538        assert_eq!(string::snake_to_camel("hello_world"), "helloWorld");
539        assert_eq!(string::snake_to_camel("test_case_name"), "testCaseName");
540        
541        assert_eq!(string::camel_to_snake("helloWorld"), "hello_world");
542        assert_eq!(string::camel_to_snake("TestCaseName"), "test_case_name");
543    }
544
545    #[test]
546    fn test_binary_detection() {
547        assert!(!string::is_likely_binary("Hello world"));
548        assert!(!string::is_likely_binary("let x = 42;\nfn main() {}"));
549        assert!(string::is_likely_binary("Hello\x00world"));
550        
551        // Test high proportion of non-printable characters
552        let mostly_non_printable = (0..100)
553            .map(|i| if i % 3 == 0 { 'a' } else { '\x01' })
554            .collect::<String>();
555        assert!(string::is_likely_binary(&mostly_non_printable));
556    }
557
558    #[test]
559    fn test_duration_formatting() {
560        assert_eq!(
561            time::duration_to_human(Duration::from_millis(500)),
562            "500ms"
563        );
564        assert_eq!(
565            time::duration_to_human(Duration::from_secs(5)),
566            "5.000s"
567        );
568        assert_eq!(
569            time::duration_to_human(Duration::from_secs(65)),
570            "1m 5s"
571        );
572        assert_eq!(
573            time::duration_to_human(Duration::from_secs(3661)),
574            "1h 1m 1s"
575        );
576    }
577
578    #[test]
579    fn test_math_functions() {
580        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
581        assert_eq!(math::mean(&values), 3.0);
582        
583        let mut values_for_median = values.clone();
584        assert_eq!(math::median(&mut values_for_median), 3.0);
585        
586        let std_dev = math::std_deviation(&values);
587        // Standard deviation of [1,2,3,4,5] is sqrt(2) ≈ 1.4142
588        assert!((std_dev - 1.4142135623730951).abs() < 1e-10);
589        
590        assert_eq!(math::clamp(5.0, 2.0, 4.0), 4.0);
591        assert_eq!(math::clamp(1.0, 2.0, 4.0), 2.0);
592        assert_eq!(math::clamp(3.0, 2.0, 4.0), 3.0);
593    }
594
595    #[test]
596    fn test_collections_utilities() {
597        let items = vec!['a', 'b', 'a', 'c', 'a'];
598        let counts = collections::count_occurrences(items.iter().cloned());
599        assert_eq!(counts[&'a'], 3);
600        assert_eq!(counts[&'b'], 1);
601        assert_eq!(counts[&'c'], 1);
602        
603        let most_common = collections::most_common(items.iter().cloned());
604        assert_eq!(most_common, Some('a'));
605    }
606
607    #[test]
608    fn test_hash_generation() {
609        let hash1 = hash::generate_hash(&"test string");
610        let hash2 = hash::generate_hash(&"test string");
611        let hash3 = hash::generate_hash(&"different string");
612        
613        assert_eq!(hash1, hash2);
614        assert_ne!(hash1, hash3);
615    }
616}