Skip to main content

directory_indexer/
utils.rs

1use sha2::{Digest, Sha256};
2use std::path::{Path, PathBuf};
3
4use crate::error::{IndexerError, Result};
5
6/// Convert a path to an absolute path
7pub fn to_absolute_path<P: AsRef<Path>>(path: P) -> Result<PathBuf> {
8    let path = path.as_ref();
9    if path.is_absolute() {
10        Ok(path.to_path_buf())
11    } else {
12        std::env::current_dir()
13            .map_err(IndexerError::from)
14            .map(|cwd| cwd.join(path))
15    }
16}
17
18/// Normalize a path for consistent storage and comparison across platforms
19/// - Uses forward slashes as separators for storage
20/// - Handles case normalization on Windows drive letters
21/// - Only converts to absolute if the path is actually relative
22pub fn normalize_path<P: AsRef<Path>>(path: P) -> Result<String> {
23    let path = path.as_ref();
24
25    // Convert to string and replace backslashes with forward slashes
26    let path_str = path.to_string_lossy();
27    let mut normalized = path_str.replace('\\', "/");
28
29    // Check if this is a Unix-style absolute path (starts with /)
30    // These should be preserved as-is, especially important for tests
31    let is_unix_absolute = normalized.starts_with('/');
32
33    // If it's a relative path (and not a Unix-style absolute path), make it absolute
34    if !path.is_absolute() && !is_unix_absolute {
35        let abs_path = to_absolute_path(path)?;
36        normalized = abs_path.to_string_lossy().replace('\\', "/");
37    }
38
39    // On Windows, normalize drive letters to lowercase if present
40    // but only for actual Windows paths, not Unix-style paths
41    #[cfg(windows)]
42    {
43        if !is_unix_absolute && normalized.len() >= 2 && normalized.chars().nth(1) == Some(':') {
44            let mut chars: Vec<char> = normalized.chars().collect();
45            chars[0] = chars[0].to_ascii_lowercase();
46            normalized = chars.into_iter().collect();
47        }
48    }
49
50    Ok(normalized)
51}
52
53/// Extract the filename from a normalized path
54/// Note: This assumes the path is already normalized (uses forward slashes)
55pub fn get_filename_from_path(path: &str) -> Option<String> {
56    path.split('/').next_back().map(|s| s.to_string())
57}
58
59/// Compare two paths in a platform-agnostic way
60pub fn paths_equal<P1: AsRef<Path>, P2: AsRef<Path>>(path1: P1, path2: P2) -> bool {
61    match (normalize_path(path1), normalize_path(path2)) {
62        (Ok(p1), Ok(p2)) => p1 == p2,
63        _ => false,
64    }
65}
66
67/// Check if a path starts with another path (useful for checking if file is in directory)
68pub fn path_starts_with<P1: AsRef<Path>, P2: AsRef<Path>>(path: P1, prefix: P2) -> bool {
69    match (normalize_path(path), normalize_path(prefix)) {
70        (Ok(p), Ok(pre)) => p.starts_with(&pre),
71        _ => false,
72    }
73}
74
75/// Get the parent directory path in normalized form
76pub fn get_parent_path<P: AsRef<Path>>(path: P) -> Result<Option<String>> {
77    let abs_path = to_absolute_path(path)?;
78    if let Some(parent) = abs_path.parent() {
79        Ok(Some(normalize_path(parent)?))
80    } else {
81        Ok(None)
82    }
83}
84
85/// Calculate SHA256 hash of file content
86pub fn calculate_file_hash<P: AsRef<Path>>(path: P) -> Result<String> {
87    let content = std::fs::read(path)?;
88    let hash = Sha256::digest(&content);
89    Ok(format!("{hash:x}"))
90}
91
92/// Check if a file should be ignored based on patterns
93pub fn should_ignore_file<P: AsRef<Path>>(path: P, ignore_patterns: &[String]) -> bool {
94    let path = path.as_ref();
95    let path_str = path.to_string_lossy();
96    let file_name = path
97        .file_name()
98        .map(|n| n.to_string_lossy())
99        .unwrap_or_default();
100
101    for pattern in ignore_patterns {
102        // Check if pattern matches directory component
103        if path_str.contains(pattern) {
104            return true;
105        }
106
107        // Check for hidden files (starts with dot)
108        if pattern == ".*" && file_name.starts_with('.') {
109            return true;
110        }
111
112        // Check for files ending with pattern (like *~)
113        if pattern.starts_with('*') && file_name.ends_with(&pattern[1..]) {
114            return true;
115        }
116
117        // Direct file name match
118        if file_name == *pattern {
119            return true;
120        }
121    }
122    false
123}
124
125/// Detect file type based on extension
126pub fn detect_file_type<P: AsRef<Path>>(path: P) -> Option<FileType> {
127    let extension = path.as_ref().extension()?.to_str()?.to_lowercase();
128
129    match extension.as_str() {
130        "md" | "txt" | "rst" | "org" => Some(FileType::Text),
131        "rs" | "py" | "js" | "ts" | "go" | "java" | "cpp" | "c" | "h" => Some(FileType::Code),
132        "json" | "yaml" | "yml" | "toml" | "csv" => Some(FileType::Data),
133        "html" | "xml" => Some(FileType::Markup),
134        "env" | "conf" | "ini" | "cfg" => Some(FileType::Config),
135        _ => None,
136    }
137}
138
139#[derive(Debug, Clone, PartialEq)]
140pub enum FileType {
141    Text,
142    Code,
143    Data,
144    Markup,
145    Config,
146}
147
148impl FileType {
149    pub fn as_str(&self) -> &'static str {
150        match self {
151            FileType::Text => "text",
152            FileType::Code => "code",
153            FileType::Data => "data",
154            FileType::Markup => "markup",
155            FileType::Config => "config",
156        }
157    }
158}
159
160/// Split text into chunks with optional overlap
161pub fn chunk_text(text: &str, chunk_size: usize, overlap: usize) -> Vec<String> {
162    if text.len() <= chunk_size {
163        return vec![text.to_string()];
164    }
165
166    let mut chunks = Vec::new();
167    let mut start = 0;
168
169    while start < text.len() {
170        let end = std::cmp::min(start + chunk_size, text.len());
171        let chunk = text[start..end].to_string();
172        chunks.push(chunk);
173
174        if end == text.len() {
175            break;
176        }
177
178        start = end.saturating_sub(overlap);
179        if start == end.saturating_sub(overlap) && start > 0 {
180            start = end;
181        }
182    }
183
184    chunks
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_chunk_text() {
193        let text = "This is a test text that should be chunked properly.";
194        let chunks = chunk_text(text, 20, 5);
195
196        assert!(!chunks.is_empty());
197        assert!(chunks[0].len() <= 20);
198    }
199
200    #[test]
201    fn test_file_type_detection() {
202        assert_eq!(detect_file_type("test.md"), Some(FileType::Text));
203        assert_eq!(detect_file_type("main.rs"), Some(FileType::Code));
204        assert_eq!(detect_file_type("data.json"), Some(FileType::Data));
205        assert_eq!(detect_file_type("unknown.xyz"), None);
206    }
207
208    #[test]
209    fn test_should_ignore_file() {
210        let patterns = vec![".git".to_string(), "node_modules".to_string()];
211
212        assert!(should_ignore_file("path/.git/config", &patterns));
213        assert!(should_ignore_file(
214            "project/node_modules/package",
215            &patterns
216        ));
217        assert!(!should_ignore_file("src/main.rs", &patterns));
218    }
219
220    #[test]
221    fn test_normalize_path() {
222        // Test relative path normalization
223        let result = normalize_path("./test.txt");
224        assert!(result.is_ok());
225        let normalized = result.unwrap();
226        assert!(normalized.ends_with("/test.txt"));
227        assert!(!normalized.contains("\\"));
228
229        // Test that normalized paths use forward slashes
230        let result = normalize_path("src/main.rs");
231        assert!(result.is_ok());
232        let normalized = result.unwrap();
233        assert!(normalized.contains("/src/main.rs"));
234        assert!(!normalized.contains("\\"));
235    }
236
237    #[test]
238    fn test_get_filename_from_path() {
239        assert_eq!(
240            get_filename_from_path("/path/to/file.txt"),
241            Some("file.txt".to_string())
242        );
243        assert_eq!(
244            get_filename_from_path("file.txt"),
245            Some("file.txt".to_string())
246        );
247        assert_eq!(get_filename_from_path("/path/to/"), Some("".to_string()));
248        assert_eq!(get_filename_from_path(""), Some("".to_string()));
249    }
250
251    #[test]
252    fn test_paths_equal() {
253        // Test equivalent paths with different separators (simulated)
254        let path1 = "src/main.rs";
255        let path2 = "src/main.rs";
256        assert!(paths_equal(path1, path2));
257    }
258
259    #[cfg(windows)]
260    #[test]
261    fn test_windows_path_normalization() {
262        // Test Windows drive letter normalization
263        let result = normalize_path("C:\\Users\\test\\file.txt");
264        assert!(result.is_ok());
265        let normalized = result.unwrap();
266        assert!(normalized.starts_with("c:/"));
267        assert!(!normalized.contains("\\"));
268
269        // Test that Unix-style absolute paths are preserved (important for tests)
270        let result = normalize_path("/home/user/documents");
271        assert!(result.is_ok());
272        let normalized = result.unwrap();
273        assert_eq!(normalized, "/home/user/documents");
274    }
275}