directory_indexer/
utils.rs1use sha2::{Digest, Sha256};
2use std::path::{Path, PathBuf};
3
4use crate::error::{IndexerError, Result};
5
6pub fn to_absolute_path<P: AsRef<Path>>(path: P) -> Result<PathBuf> {
8 let path = path.as_ref();
9 if path.is_absolute() {
10 Ok(path.to_path_buf())
11 } else {
12 std::env::current_dir()
13 .map_err(IndexerError::from)
14 .map(|cwd| cwd.join(path))
15 }
16}
17
18pub fn normalize_path<P: AsRef<Path>>(path: P) -> Result<String> {
23 let path = path.as_ref();
24
25 let path_str = path.to_string_lossy();
27 let mut normalized = path_str.replace('\\', "/");
28
29 let is_unix_absolute = normalized.starts_with('/');
32
33 if !path.is_absolute() && !is_unix_absolute {
35 let abs_path = to_absolute_path(path)?;
36 normalized = abs_path.to_string_lossy().replace('\\', "/");
37 }
38
39 #[cfg(windows)]
42 {
43 if !is_unix_absolute && normalized.len() >= 2 && normalized.chars().nth(1) == Some(':') {
44 let mut chars: Vec<char> = normalized.chars().collect();
45 chars[0] = chars[0].to_ascii_lowercase();
46 normalized = chars.into_iter().collect();
47 }
48 }
49
50 Ok(normalized)
51}
52
53pub fn get_filename_from_path(path: &str) -> Option<String> {
56 path.split('/').next_back().map(|s| s.to_string())
57}
58
59pub fn paths_equal<P1: AsRef<Path>, P2: AsRef<Path>>(path1: P1, path2: P2) -> bool {
61 match (normalize_path(path1), normalize_path(path2)) {
62 (Ok(p1), Ok(p2)) => p1 == p2,
63 _ => false,
64 }
65}
66
67pub fn path_starts_with<P1: AsRef<Path>, P2: AsRef<Path>>(path: P1, prefix: P2) -> bool {
69 match (normalize_path(path), normalize_path(prefix)) {
70 (Ok(p), Ok(pre)) => p.starts_with(&pre),
71 _ => false,
72 }
73}
74
75pub fn get_parent_path<P: AsRef<Path>>(path: P) -> Result<Option<String>> {
77 let abs_path = to_absolute_path(path)?;
78 if let Some(parent) = abs_path.parent() {
79 Ok(Some(normalize_path(parent)?))
80 } else {
81 Ok(None)
82 }
83}
84
85pub fn calculate_file_hash<P: AsRef<Path>>(path: P) -> Result<String> {
87 let content = std::fs::read(path)?;
88 let hash = Sha256::digest(&content);
89 Ok(format!("{hash:x}"))
90}
91
92pub fn should_ignore_file<P: AsRef<Path>>(path: P, ignore_patterns: &[String]) -> bool {
94 let path = path.as_ref();
95 let path_str = path.to_string_lossy();
96 let file_name = path
97 .file_name()
98 .map(|n| n.to_string_lossy())
99 .unwrap_or_default();
100
101 for pattern in ignore_patterns {
102 if path_str.contains(pattern) {
104 return true;
105 }
106
107 if pattern == ".*" && file_name.starts_with('.') {
109 return true;
110 }
111
112 if pattern.starts_with('*') && file_name.ends_with(&pattern[1..]) {
114 return true;
115 }
116
117 if file_name == *pattern {
119 return true;
120 }
121 }
122 false
123}
124
125pub fn detect_file_type<P: AsRef<Path>>(path: P) -> Option<FileType> {
127 let extension = path.as_ref().extension()?.to_str()?.to_lowercase();
128
129 match extension.as_str() {
130 "md" | "txt" | "rst" | "org" => Some(FileType::Text),
131 "rs" | "py" | "js" | "ts" | "go" | "java" | "cpp" | "c" | "h" => Some(FileType::Code),
132 "json" | "yaml" | "yml" | "toml" | "csv" => Some(FileType::Data),
133 "html" | "xml" => Some(FileType::Markup),
134 "env" | "conf" | "ini" | "cfg" => Some(FileType::Config),
135 _ => None,
136 }
137}
138
139#[derive(Debug, Clone, PartialEq)]
140pub enum FileType {
141 Text,
142 Code,
143 Data,
144 Markup,
145 Config,
146}
147
148impl FileType {
149 pub fn as_str(&self) -> &'static str {
150 match self {
151 FileType::Text => "text",
152 FileType::Code => "code",
153 FileType::Data => "data",
154 FileType::Markup => "markup",
155 FileType::Config => "config",
156 }
157 }
158}
159
160pub fn chunk_text(text: &str, chunk_size: usize, overlap: usize) -> Vec<String> {
162 if text.len() <= chunk_size {
163 return vec![text.to_string()];
164 }
165
166 let mut chunks = Vec::new();
167 let mut start = 0;
168
169 while start < text.len() {
170 let end = std::cmp::min(start + chunk_size, text.len());
171 let chunk = text[start..end].to_string();
172 chunks.push(chunk);
173
174 if end == text.len() {
175 break;
176 }
177
178 start = end.saturating_sub(overlap);
179 if start == end.saturating_sub(overlap) && start > 0 {
180 start = end;
181 }
182 }
183
184 chunks
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190
191 #[test]
192 fn test_chunk_text() {
193 let text = "This is a test text that should be chunked properly.";
194 let chunks = chunk_text(text, 20, 5);
195
196 assert!(!chunks.is_empty());
197 assert!(chunks[0].len() <= 20);
198 }
199
200 #[test]
201 fn test_file_type_detection() {
202 assert_eq!(detect_file_type("test.md"), Some(FileType::Text));
203 assert_eq!(detect_file_type("main.rs"), Some(FileType::Code));
204 assert_eq!(detect_file_type("data.json"), Some(FileType::Data));
205 assert_eq!(detect_file_type("unknown.xyz"), None);
206 }
207
208 #[test]
209 fn test_should_ignore_file() {
210 let patterns = vec![".git".to_string(), "node_modules".to_string()];
211
212 assert!(should_ignore_file("path/.git/config", &patterns));
213 assert!(should_ignore_file(
214 "project/node_modules/package",
215 &patterns
216 ));
217 assert!(!should_ignore_file("src/main.rs", &patterns));
218 }
219
220 #[test]
221 fn test_normalize_path() {
222 let result = normalize_path("./test.txt");
224 assert!(result.is_ok());
225 let normalized = result.unwrap();
226 assert!(normalized.ends_with("/test.txt"));
227 assert!(!normalized.contains("\\"));
228
229 let result = normalize_path("src/main.rs");
231 assert!(result.is_ok());
232 let normalized = result.unwrap();
233 assert!(normalized.contains("/src/main.rs"));
234 assert!(!normalized.contains("\\"));
235 }
236
237 #[test]
238 fn test_get_filename_from_path() {
239 assert_eq!(
240 get_filename_from_path("/path/to/file.txt"),
241 Some("file.txt".to_string())
242 );
243 assert_eq!(
244 get_filename_from_path("file.txt"),
245 Some("file.txt".to_string())
246 );
247 assert_eq!(get_filename_from_path("/path/to/"), Some("".to_string()));
248 assert_eq!(get_filename_from_path(""), Some("".to_string()));
249 }
250
251 #[test]
252 fn test_paths_equal() {
253 let path1 = "src/main.rs";
255 let path2 = "src/main.rs";
256 assert!(paths_equal(path1, path2));
257 }
258
259 #[cfg(windows)]
260 #[test]
261 fn test_windows_path_normalization() {
262 let result = normalize_path("C:\\Users\\test\\file.txt");
264 assert!(result.is_ok());
265 let normalized = result.unwrap();
266 assert!(normalized.starts_with("c:/"));
267 assert!(!normalized.contains("\\"));
268
269 let result = normalize_path("/home/user/documents");
271 assert!(result.is_ok());
272 let normalized = result.unwrap();
273 assert_eq!(normalized, "/home/user/documents");
274 }
275}