tree_parser/utils.rs
1//! Utility functions for the tree parser library
2
3use crate::Language;
4
5/// Check if a file extension is supported by the parser
6///
7/// This function determines whether the tree parser library can handle
8/// files with the given extension by checking against the list of
9/// supported programming languages.
10///
11/// # Arguments
12///
13/// * `extension` - The file extension to check (without the dot)
14///
15/// # Returns
16///
17/// `true` if the extension is supported, `false` otherwise.
18///
19/// # Examples
20///
21/// ```rust
22/// use tree_parser::is_supported_extension;
23///
24/// assert!(is_supported_extension("py"));
25/// assert!(is_supported_extension("rs"));
26/// assert!(is_supported_extension("js"));
27/// assert!(!is_supported_extension("xyz"));
28/// ```
29pub fn is_supported_extension(extension: &str) -> bool {
30 crate::languages::detect_language_by_extension(&format!("file.{}", extension)).is_some()
31}
32
33/// Get a list of all file extensions supported by the parser
34///
35/// This function returns a comprehensive list of all file extensions
36/// that the tree parser library can process. This is useful for
37/// filtering files or building file selection interfaces.
38///
39/// # Returns
40///
41/// A vector of strings containing all supported file extensions
42/// (without the leading dot).
43///
44/// # Examples
45///
46/// ```rust
47/// use tree_parser::get_supported_extensions;
48///
49/// let extensions = get_supported_extensions();
50/// println!("Supported extensions: {:?}", extensions);
51///
52/// // Check if a specific extension is in the list
53/// assert!(extensions.contains(&"py".to_string()));
54/// assert!(extensions.contains(&"rs".to_string()));
55/// ```
56pub fn get_supported_extensions() -> Vec<String> {
57 vec![
58 "py".to_string(), "pyw".to_string(), "pyi".to_string(),
59 "rs".to_string(),
60 "js".to_string(), "mjs".to_string(), "cjs".to_string(),
61 "ts".to_string(), "mts".to_string(), "cts".to_string(),
62 "java".to_string(),
63 "c".to_string(), "h".to_string(),
64 "cpp".to_string(), "cc".to_string(), "cxx".to_string(), "c++".to_string(),
65 "hpp".to_string(), "hh".to_string(), "hxx".to_string(), "h++".to_string(),
66 "go".to_string(),
67 "cs".to_string(),
68 "php".to_string(), "phtml".to_string(), "php3".to_string(), "php4".to_string(), "php5".to_string(), "phps".to_string(),
69 "rb".to_string(), "rbw".to_string(),
70 "swift".to_string(),
71 "kt".to_string(), "kts".to_string(),
72 "scala".to_string(), "sc".to_string(),
73 "hs".to_string(), "lhs".to_string(),
74 "lua".to_string(),
75 "pl".to_string(), "pm".to_string(), "t".to_string(), "pod".to_string(),
76 "r".to_string(), "R".to_string(),
77 "sh".to_string(), "bash".to_string(), "zsh".to_string(), "fish".to_string(),
78 "ps1".to_string(), "psm1".to_string(), "psd1".to_string(),
79 "html".to_string(), "htm".to_string(), "xhtml".to_string(),
80 "css".to_string(),
81 "sql".to_string(),
82 "json".to_string(),
83 "yaml".to_string(), "yml".to_string(),
84 "toml".to_string(),
85 "xml".to_string(), "xsd".to_string(), "xsl".to_string(), "xslt".to_string(),
86 ]
87}
88
89/// Convert a string representation to a Language enum
90///
91/// This function parses various string representations of programming
92/// language names and returns the corresponding `Language` enum value.
93/// The parsing is case-insensitive and supports multiple aliases for
94/// each language.
95///
96/// # Arguments
97///
98/// * `lang_str` - String representation of the language name
99///
100/// # Returns
101///
102/// `Some(Language)` if the string is recognized, `None` otherwise.
103///
104/// # Supported Aliases
105///
106/// - **Python**: "python", "py"
107/// - **Rust**: "rust", "rs"
108/// - **JavaScript**: "javascript", "js"
109/// - **TypeScript**: "typescript", "ts"
110/// - **C++**: "cpp", "c++", "cxx"
111/// - **C#**: "csharp", "c#", "cs"
112/// - And many more...
113///
114/// # Examples
115///
116/// ```rust
117/// use tree_parser::{language_from_string, Language};
118///
119/// assert_eq!(language_from_string("python"), Some(Language::Python));
120/// assert_eq!(language_from_string("RUST"), Some(Language::Rust));
121/// assert_eq!(language_from_string("js"), Some(Language::JavaScript));
122/// assert_eq!(language_from_string("unknown"), None);
123/// ```
124pub fn language_from_string(lang_str: &str) -> Option<Language> {
125 match lang_str.to_lowercase().as_str() {
126 "python" | "py" => Some(Language::Python),
127 "rust" | "rs" => Some(Language::Rust),
128 "javascript" | "js" => Some(Language::JavaScript),
129 "typescript" | "ts" => Some(Language::TypeScript),
130 "java" => Some(Language::Java),
131 "c" => Some(Language::C),
132 "cpp" | "c++" | "cxx" => Some(Language::Cpp),
133 "go" | "golang" => Some(Language::Go),
134 "csharp" | "c#" | "cs" => Some(Language::CSharp),
135 "php" => Some(Language::Php),
136 "ruby" | "rb" => Some(Language::Ruby),
137 "swift" => Some(Language::Swift),
138 "kotlin" | "kt" => Some(Language::Kotlin),
139 "scala" => Some(Language::Scala),
140 "haskell" | "hs" => Some(Language::Haskell),
141 "lua" => Some(Language::Lua),
142 "perl" | "pl" => Some(Language::Perl),
143 "r" => Some(Language::R),
144 "bash" | "sh" => Some(Language::Bash),
145 "powershell" | "ps1" => Some(Language::PowerShell),
146 "html" => Some(Language::Html),
147 "css" => Some(Language::Css),
148 "sql" => Some(Language::Sql),
149 "json" => Some(Language::Json),
150 "yaml" | "yml" => Some(Language::Yaml),
151 "toml" => Some(Language::Toml),
152 "xml" => Some(Language::Xml),
153 _ => None,
154 }
155}
156
157/// Convert a Language enum to its string representation
158///
159/// This function converts a `Language` enum value to a human-readable
160/// string representation. The returned strings use proper capitalization
161/// and are suitable for display purposes.
162///
163/// # Arguments
164///
165/// * `language` - The Language enum value to convert
166///
167/// # Returns
168///
169/// A string representation of the language name.
170///
171/// # Examples
172///
173/// ```rust
174/// use tree_parser::{language_to_string, Language};
175///
176/// assert_eq!(language_to_string(&Language::Python), "Python");
177/// assert_eq!(language_to_string(&Language::Rust), "Rust");
178/// assert_eq!(language_to_string(&Language::JavaScript), "JavaScript");
179/// assert_eq!(language_to_string(&Language::Cpp), "C++");
180/// ```
181pub fn language_to_string(language: &Language) -> String {
182 match language {
183 Language::Python => "Python".to_string(),
184 Language::Rust => "Rust".to_string(),
185 Language::JavaScript => "JavaScript".to_string(),
186 Language::TypeScript => "TypeScript".to_string(),
187 Language::Java => "Java".to_string(),
188 Language::C => "C".to_string(),
189 Language::Cpp => "C++".to_string(),
190 Language::Go => "Go".to_string(),
191 Language::CSharp => "C#".to_string(),
192 Language::Php => "PHP".to_string(),
193 Language::Ruby => "Ruby".to_string(),
194 Language::Swift => "Swift".to_string(),
195 Language::Kotlin => "Kotlin".to_string(),
196 Language::Scala => "Scala".to_string(),
197 Language::Haskell => "Haskell".to_string(),
198 Language::Lua => "Lua".to_string(),
199 Language::Perl => "Perl".to_string(),
200 Language::R => "R".to_string(),
201 Language::Bash => "Bash".to_string(),
202 Language::PowerShell => "PowerShell".to_string(),
203 Language::Html => "HTML".to_string(),
204 Language::Css => "CSS".to_string(),
205 Language::Sql => "SQL".to_string(),
206 Language::Json => "JSON".to_string(),
207 Language::Yaml => "YAML".to_string(),
208 Language::Toml => "TOML".to_string(),
209 Language::Xml => "XML".to_string(),
210 }
211}
212
213/// Format file size in human-readable format
214///
215/// This utility function converts a file size in bytes to a human-readable
216/// string using appropriate units (B, KB, MB, GB, TB).
217///
218/// # Arguments
219///
220/// * `bytes` - File size in bytes
221///
222/// # Returns
223///
224/// A formatted string with the size and appropriate unit.
225///
226/// # Examples
227///
228/// ```rust
229/// use tree_parser::format_file_size;
230///
231/// assert_eq!(format_file_size(512), "512 B");
232/// assert_eq!(format_file_size(1024), "1.00 KB");
233/// assert_eq!(format_file_size(1536), "1.50 KB");
234/// assert_eq!(format_file_size(1048576), "1.00 MB");
235/// ```
236pub fn format_file_size(bytes: usize) -> String {
237 const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
238 let mut size = bytes as f64;
239 let mut unit_index = 0;
240
241 while size >= 1024.0 && unit_index < UNITS.len() - 1 {
242 size /= 1024.0;
243 unit_index += 1;
244 }
245
246 if unit_index == 0 {
247 format!("{} {}", bytes, UNITS[unit_index])
248 } else {
249 format!("{:.2} {}", size, UNITS[unit_index])
250 }
251}
252
253/// Format duration in human-readable format
254///
255/// This utility function converts a duration in milliseconds to a
256/// human-readable string using appropriate units (ms, s, m).
257///
258/// # Arguments
259///
260/// * `ms` - Duration in milliseconds
261///
262/// # Returns
263///
264/// A formatted string with the duration and appropriate unit.
265///
266/// # Examples
267///
268/// ```rust
269/// use tree_parser::format_duration;
270///
271/// assert_eq!(format_duration(500), "500ms");
272/// assert_eq!(format_duration(1500), "1.50s");
273/// assert_eq!(format_duration(65000), "1m 5.00s");
274/// ```
275pub fn format_duration(ms: u64) -> String {
276 if ms < 1000 {
277 format!("{}ms", ms)
278 } else if ms < 60_000 {
279 format!("{:.2}s", ms as f64 / 1000.0)
280 } else {
281 let minutes = ms / 60_000;
282 let seconds = (ms % 60_000) as f64 / 1000.0;
283 format!("{}m {:.2}s", minutes, seconds)
284 }
285}
286
287/// Validate that a file path exists
288///
289/// This function checks whether the specified file path exists on the filesystem.
290///
291/// # Arguments
292///
293/// * `path` - The file path to validate
294///
295/// # Returns
296///
297/// `true` if the file exists, `false` otherwise.
298///
299/// # Examples
300///
301/// ```rust
302/// use tree_parser::is_valid_file_path;
303///
304/// // This will depend on your actual filesystem
305/// let exists = is_valid_file_path("Cargo.toml");
306/// ```
307pub fn is_valid_file_path(path: &str) -> bool {
308 std::path::Path::new(path).exists()
309}
310
311/// Validate that a directory path exists
312///
313/// This function checks whether the specified directory path exists on the filesystem
314/// and is actually a directory (not a file).
315///
316/// # Arguments
317///
318/// * `path` - The directory path to validate
319///
320/// # Returns
321///
322/// `true` if the directory exists, `false` otherwise.
323///
324/// # Examples
325///
326/// ```rust
327/// use tree_parser::is_valid_directory_path;
328///
329/// // This will depend on your actual filesystem
330/// let exists = is_valid_directory_path("src");
331/// ```
332pub fn is_valid_directory_path(path: &str) -> bool {
333 let path = std::path::Path::new(path);
334 path.exists() && path.is_dir()
335}
336
337/// Extract the file extension from a file path
338///
339/// This function extracts the file extension from a given path and returns
340/// it in lowercase. This is useful for language detection and file filtering.
341///
342/// # Arguments
343///
344/// * `path` - The file path to extract the extension from
345///
346/// # Returns
347///
348/// `Some(String)` containing the lowercase extension if present, `None` otherwise.
349///
350/// # Examples
351///
352/// ```rust
353/// use tree_parser::get_file_extension;
354///
355/// assert_eq!(get_file_extension("test.py"), Some("py".to_string()));
356/// assert_eq!(get_file_extension("test.RS"), Some("rs".to_string()));
357/// assert_eq!(get_file_extension("test"), None);
358/// assert_eq!(get_file_extension("path/to/file.js"), Some("js".to_string()));
359/// ```
360pub fn get_file_extension(path: &str) -> Option<String> {
361 std::path::Path::new(path)
362 .extension()
363 .and_then(|ext| ext.to_str())
364 .map(|ext| ext.to_lowercase())
365}
366
367/// Extract the file name without its extension
368///
369/// This function extracts just the file name portion of a path,
370/// excluding both the directory path and the file extension.
371///
372/// # Arguments
373///
374/// * `path` - The file path to extract the name from
375///
376/// # Returns
377///
378/// `Some(String)` containing the file name without extension if present, `None` otherwise.
379///
380/// # Examples
381///
382/// ```rust
383/// use tree_parser::get_file_name_without_extension;
384///
385/// assert_eq!(get_file_name_without_extension("test.py"), Some("test".to_string()));
386/// assert_eq!(get_file_name_without_extension("path/to/file.rs"), Some("file".to_string()));
387/// assert_eq!(get_file_name_without_extension("no_extension"), Some("no_extension".to_string()));
388/// ```
389pub fn get_file_name_without_extension(path: &str) -> Option<String> {
390 std::path::Path::new(path)
391 .file_stem()
392 .and_then(|name| name.to_str())
393 .map(|name| name.to_string())
394}
395
396/// Check if a path matches any of the specified ignore patterns
397///
398/// This function checks whether a given path contains any of the specified
399/// ignore patterns. This is used for filtering out unwanted files and directories
400/// during parsing operations.
401///
402/// # Arguments
403///
404/// * `path` - The file path to check
405/// * `patterns` - Array of patterns to match against
406///
407/// # Returns
408///
409/// `true` if the path matches any pattern, `false` otherwise.
410///
411/// # Examples
412///
413/// ```rust
414/// use tree_parser::matches_ignore_patterns;
415///
416/// let patterns = vec!["target".to_string(), "node_modules".to_string()];
417///
418/// assert!(matches_ignore_patterns("src/target/debug", &patterns));
419/// assert!(matches_ignore_patterns("frontend/node_modules/react", &patterns));
420/// assert!(!matches_ignore_patterns("src/main.rs", &patterns));
421/// ```
422pub fn matches_ignore_patterns(path: &str, patterns: &[String]) -> bool {
423 for pattern in patterns {
424 if path.contains(pattern) {
425 return true;
426 }
427 }
428 false
429}
430
431/// Sanitize a file path for safe usage
432///
433/// This function removes potentially dangerous path components like ".."
434/// and normalizes path separators to prevent directory traversal attacks
435/// and other security issues.
436///
437/// # Arguments
438///
439/// * `path` - The file path to sanitize
440///
441/// # Returns
442///
443/// A sanitized version of the input path.
444///
445/// # Examples
446///
447/// ```rust
448/// use tree_parser::sanitize_path;
449///
450/// assert_eq!(sanitize_path("../../../etc/passwd"), "etc/passwd");
451/// assert_eq!(sanitize_path("src//main.rs"), "src/main.rs");
452/// assert_eq!(sanitize_path("/absolute/path"), "absolute/path");
453/// ```
454pub fn sanitize_path(path: &str) -> String {
455 path.replace("..", "")
456 .replace("//", "/")
457 .trim_start_matches('/')
458 .to_string()
459}
460
461#[cfg(test)]
462mod tests {
463 use super::*;
464
465 #[test]
466 fn test_format_file_size() {
467 assert_eq!(format_file_size(512), "512 B");
468 assert_eq!(format_file_size(1024), "1.00 KB");
469 assert_eq!(format_file_size(1536), "1.50 KB");
470 assert_eq!(format_file_size(1048576), "1.00 MB");
471 }
472
473 #[test]
474 fn test_format_duration() {
475 assert_eq!(format_duration(500), "500ms");
476 assert_eq!(format_duration(1500), "1.50s");
477 assert_eq!(format_duration(65000), "1m 5.00s");
478 }
479
480 #[test]
481 fn test_language_conversion() {
482 assert_eq!(language_from_string("python"), Some(Language::Python));
483 assert_eq!(language_from_string("rust"), Some(Language::Rust));
484 assert_eq!(language_from_string("invalid"), None);
485
486 assert_eq!(language_to_string(&Language::Python), "Python");
487 assert_eq!(language_to_string(&Language::Rust), "Rust");
488 }
489
490 #[test]
491 fn test_file_extension() {
492 assert_eq!(get_file_extension("test.py"), Some("py".to_string()));
493 assert_eq!(get_file_extension("test.RS"), Some("rs".to_string()));
494 assert_eq!(get_file_extension("test"), None);
495 }
496
497 #[test]
498 fn test_supported_extensions() {
499 assert!(is_supported_extension("py"));
500 assert!(is_supported_extension("rs"));
501 assert!(is_supported_extension("js"));
502 assert!(!is_supported_extension("xyz"));
503 }
504}