tree_parser/
utils.rs

1//! Utility functions for the tree parser library
2
3use crate::Language;
4
5/// Check if a file extension is supported by the parser
6/// 
7/// This function determines whether the tree parser library can handle
8/// files with the given extension by checking against the list of
9/// supported programming languages.
10/// 
11/// # Arguments
12/// 
13/// * `extension` - The file extension to check (without the dot)
14/// 
15/// # Returns
16/// 
17/// `true` if the extension is supported, `false` otherwise.
18/// 
19/// # Examples
20/// 
21/// ```rust
22/// use tree_parser::is_supported_extension;
23/// 
24/// assert!(is_supported_extension("py"));
25/// assert!(is_supported_extension("rs"));
26/// assert!(is_supported_extension("js"));
27/// assert!(!is_supported_extension("xyz"));
28/// ```
29pub fn is_supported_extension(extension: &str) -> bool {
30    crate::languages::detect_language_by_extension(&format!("file.{}", extension)).is_some()
31}
32
33/// Get a list of all file extensions supported by the parser
34/// 
35/// This function returns a comprehensive list of all file extensions
36/// that the tree parser library can process. This is useful for
37/// filtering files or building file selection interfaces.
38/// 
39/// # Returns
40/// 
41/// A vector of strings containing all supported file extensions
42/// (without the leading dot).
43/// 
44/// # Examples
45/// 
46/// ```rust
47/// use tree_parser::get_supported_extensions;
48/// 
49/// let extensions = get_supported_extensions();
50/// println!("Supported extensions: {:?}", extensions);
51/// 
52/// // Check if a specific extension is in the list
53/// assert!(extensions.contains(&"py".to_string()));
54/// assert!(extensions.contains(&"rs".to_string()));
55/// ```
56pub fn get_supported_extensions() -> Vec<String> {
57    vec![
58        "py".to_string(), "pyw".to_string(), "pyi".to_string(),
59        "rs".to_string(),
60        "js".to_string(), "mjs".to_string(), "cjs".to_string(),
61        "ts".to_string(), "mts".to_string(), "cts".to_string(),
62        "java".to_string(),
63        "c".to_string(), "h".to_string(),
64        "cpp".to_string(), "cc".to_string(), "cxx".to_string(), "c++".to_string(),
65        "hpp".to_string(), "hh".to_string(), "hxx".to_string(), "h++".to_string(),
66        "go".to_string(),
67        "cs".to_string(),
68        "php".to_string(), "phtml".to_string(), "php3".to_string(), "php4".to_string(), "php5".to_string(), "phps".to_string(),
69        "rb".to_string(), "rbw".to_string(),
70        "swift".to_string(),
71        "kt".to_string(), "kts".to_string(),
72        "scala".to_string(), "sc".to_string(),
73        "hs".to_string(), "lhs".to_string(),
74        "lua".to_string(),
75        "pl".to_string(), "pm".to_string(), "t".to_string(), "pod".to_string(),
76        "r".to_string(), "R".to_string(),
77        "sh".to_string(), "bash".to_string(), "zsh".to_string(), "fish".to_string(),
78        "ps1".to_string(), "psm1".to_string(), "psd1".to_string(),
79        "html".to_string(), "htm".to_string(), "xhtml".to_string(),
80        "css".to_string(),
81        "sql".to_string(),
82        "json".to_string(),
83        "yaml".to_string(), "yml".to_string(),
84        "toml".to_string(),
85        "xml".to_string(), "xsd".to_string(), "xsl".to_string(), "xslt".to_string(),
86    ]
87}
88
89/// Convert a string representation to a Language enum
90/// 
91/// This function parses various string representations of programming
92/// language names and returns the corresponding `Language` enum value.
93/// The parsing is case-insensitive and supports multiple aliases for
94/// each language.
95/// 
96/// # Arguments
97/// 
98/// * `lang_str` - String representation of the language name
99/// 
100/// # Returns
101/// 
102/// `Some(Language)` if the string is recognized, `None` otherwise.
103/// 
104/// # Supported Aliases
105/// 
106/// - **Python**: "python", "py"
107/// - **Rust**: "rust", "rs"
108/// - **JavaScript**: "javascript", "js"
109/// - **TypeScript**: "typescript", "ts"
110/// - **C++**: "cpp", "c++", "cxx"
111/// - **C#**: "csharp", "c#", "cs"
112/// - And many more...
113/// 
114/// # Examples
115/// 
116/// ```rust
117/// use tree_parser::{language_from_string, Language};
118/// 
119/// assert_eq!(language_from_string("python"), Some(Language::Python));
120/// assert_eq!(language_from_string("RUST"), Some(Language::Rust));
121/// assert_eq!(language_from_string("js"), Some(Language::JavaScript));
122/// assert_eq!(language_from_string("unknown"), None);
123/// ```
124pub fn language_from_string(lang_str: &str) -> Option<Language> {
125    match lang_str.to_lowercase().as_str() {
126        "python" | "py" => Some(Language::Python),
127        "rust" | "rs" => Some(Language::Rust),
128        "javascript" | "js" => Some(Language::JavaScript),
129        "typescript" | "ts" => Some(Language::TypeScript),
130        "java" => Some(Language::Java),
131        "c" => Some(Language::C),
132        "cpp" | "c++" | "cxx" => Some(Language::Cpp),
133        "go" | "golang" => Some(Language::Go),
134        "csharp" | "c#" | "cs" => Some(Language::CSharp),
135        "php" => Some(Language::Php),
136        "ruby" | "rb" => Some(Language::Ruby),
137        "swift" => Some(Language::Swift),
138        "kotlin" | "kt" => Some(Language::Kotlin),
139        "scala" => Some(Language::Scala),
140        "haskell" | "hs" => Some(Language::Haskell),
141        "lua" => Some(Language::Lua),
142        "perl" | "pl" => Some(Language::Perl),
143        "r" => Some(Language::R),
144        "bash" | "sh" => Some(Language::Bash),
145        "powershell" | "ps1" => Some(Language::PowerShell),
146        "html" => Some(Language::Html),
147        "css" => Some(Language::Css),
148        "sql" => Some(Language::Sql),
149        "json" => Some(Language::Json),
150        "yaml" | "yml" => Some(Language::Yaml),
151        "toml" => Some(Language::Toml),
152        "xml" => Some(Language::Xml),
153        _ => None,
154    }
155}
156
157/// Convert a Language enum to its string representation
158/// 
159/// This function converts a `Language` enum value to a human-readable
160/// string representation. The returned strings use proper capitalization
161/// and are suitable for display purposes.
162/// 
163/// # Arguments
164/// 
165/// * `language` - The Language enum value to convert
166/// 
167/// # Returns
168/// 
169/// A string representation of the language name.
170/// 
171/// # Examples
172/// 
173/// ```rust
174/// use tree_parser::{language_to_string, Language};
175/// 
176/// assert_eq!(language_to_string(&Language::Python), "Python");
177/// assert_eq!(language_to_string(&Language::Rust), "Rust");
178/// assert_eq!(language_to_string(&Language::JavaScript), "JavaScript");
179/// assert_eq!(language_to_string(&Language::Cpp), "C++");
180/// ```
181pub fn language_to_string(language: &Language) -> String {
182    match language {
183        Language::Python => "Python".to_string(),
184        Language::Rust => "Rust".to_string(),
185        Language::JavaScript => "JavaScript".to_string(),
186        Language::TypeScript => "TypeScript".to_string(),
187        Language::Java => "Java".to_string(),
188        Language::C => "C".to_string(),
189        Language::Cpp => "C++".to_string(),
190        Language::Go => "Go".to_string(),
191        Language::CSharp => "C#".to_string(),
192        Language::Php => "PHP".to_string(),
193        Language::Ruby => "Ruby".to_string(),
194        Language::Swift => "Swift".to_string(),
195        Language::Kotlin => "Kotlin".to_string(),
196        Language::Scala => "Scala".to_string(),
197        Language::Haskell => "Haskell".to_string(),
198        Language::Lua => "Lua".to_string(),
199        Language::Perl => "Perl".to_string(),
200        Language::R => "R".to_string(),
201        Language::Bash => "Bash".to_string(),
202        Language::PowerShell => "PowerShell".to_string(),
203        Language::Html => "HTML".to_string(),
204        Language::Css => "CSS".to_string(),
205        Language::Sql => "SQL".to_string(),
206        Language::Json => "JSON".to_string(),
207        Language::Yaml => "YAML".to_string(),
208        Language::Toml => "TOML".to_string(),
209        Language::Xml => "XML".to_string(),
210    }
211}
212
213/// Format file size in human-readable format
214/// 
215/// This utility function converts a file size in bytes to a human-readable
216/// string using appropriate units (B, KB, MB, GB, TB).
217/// 
218/// # Arguments
219/// 
220/// * `bytes` - File size in bytes
221/// 
222/// # Returns
223/// 
224/// A formatted string with the size and appropriate unit.
225/// 
226/// # Examples
227/// 
228/// ```rust
229/// use tree_parser::format_file_size;
230/// 
231/// assert_eq!(format_file_size(512), "512 B");
232/// assert_eq!(format_file_size(1024), "1.00 KB");
233/// assert_eq!(format_file_size(1536), "1.50 KB");
234/// assert_eq!(format_file_size(1048576), "1.00 MB");
235/// ```
236pub fn format_file_size(bytes: usize) -> String {
237    const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
238    let mut size = bytes as f64;
239    let mut unit_index = 0;
240    
241    while size >= 1024.0 && unit_index < UNITS.len() - 1 {
242        size /= 1024.0;
243        unit_index += 1;
244    }
245    
246    if unit_index == 0 {
247        format!("{} {}", bytes, UNITS[unit_index])
248    } else {
249        format!("{:.2} {}", size, UNITS[unit_index])
250    }
251}
252
253/// Format duration in human-readable format
254/// 
255/// This utility function converts a duration in milliseconds to a
256/// human-readable string using appropriate units (ms, s, m).
257/// 
258/// # Arguments
259/// 
260/// * `ms` - Duration in milliseconds
261/// 
262/// # Returns
263/// 
264/// A formatted string with the duration and appropriate unit.
265/// 
266/// # Examples
267/// 
268/// ```rust
269/// use tree_parser::format_duration;
270/// 
271/// assert_eq!(format_duration(500), "500ms");
272/// assert_eq!(format_duration(1500), "1.50s");
273/// assert_eq!(format_duration(65000), "1m 5.00s");
274/// ```
275pub fn format_duration(ms: u64) -> String {
276    if ms < 1000 {
277        format!("{}ms", ms)
278    } else if ms < 60_000 {
279        format!("{:.2}s", ms as f64 / 1000.0)
280    } else {
281        let minutes = ms / 60_000;
282        let seconds = (ms % 60_000) as f64 / 1000.0;
283        format!("{}m {:.2}s", minutes, seconds)
284    }
285}
286
287/// Validate that a file path exists
288/// 
289/// This function checks whether the specified file path exists on the filesystem.
290/// 
291/// # Arguments
292/// 
293/// * `path` - The file path to validate
294/// 
295/// # Returns
296/// 
297/// `true` if the file exists, `false` otherwise.
298/// 
299/// # Examples
300/// 
301/// ```rust
302/// use tree_parser::is_valid_file_path;
303/// 
304/// // This will depend on your actual filesystem
305/// let exists = is_valid_file_path("Cargo.toml");
306/// ```
307pub fn is_valid_file_path(path: &str) -> bool {
308    std::path::Path::new(path).exists()
309}
310
311/// Validate that a directory path exists
312/// 
313/// This function checks whether the specified directory path exists on the filesystem
314/// and is actually a directory (not a file).
315/// 
316/// # Arguments
317/// 
318/// * `path` - The directory path to validate
319/// 
320/// # Returns
321/// 
322/// `true` if the directory exists, `false` otherwise.
323/// 
324/// # Examples
325/// 
326/// ```rust
327/// use tree_parser::is_valid_directory_path;
328/// 
329/// // This will depend on your actual filesystem
330/// let exists = is_valid_directory_path("src");
331/// ```
332pub fn is_valid_directory_path(path: &str) -> bool {
333    let path = std::path::Path::new(path);
334    path.exists() && path.is_dir()
335}
336
337/// Extract the file extension from a file path
338/// 
339/// This function extracts the file extension from a given path and returns
340/// it in lowercase. This is useful for language detection and file filtering.
341/// 
342/// # Arguments
343/// 
344/// * `path` - The file path to extract the extension from
345/// 
346/// # Returns
347/// 
348/// `Some(String)` containing the lowercase extension if present, `None` otherwise.
349/// 
350/// # Examples
351/// 
352/// ```rust
353/// use tree_parser::get_file_extension;
354/// 
355/// assert_eq!(get_file_extension("test.py"), Some("py".to_string()));
356/// assert_eq!(get_file_extension("test.RS"), Some("rs".to_string()));
357/// assert_eq!(get_file_extension("test"), None);
358/// assert_eq!(get_file_extension("path/to/file.js"), Some("js".to_string()));
359/// ```
360pub fn get_file_extension(path: &str) -> Option<String> {
361    std::path::Path::new(path)
362        .extension()
363        .and_then(|ext| ext.to_str())
364        .map(|ext| ext.to_lowercase())
365}
366
367/// Extract the file name without its extension
368/// 
369/// This function extracts just the file name portion of a path,
370/// excluding both the directory path and the file extension.
371/// 
372/// # Arguments
373/// 
374/// * `path` - The file path to extract the name from
375/// 
376/// # Returns
377/// 
378/// `Some(String)` containing the file name without extension if present, `None` otherwise.
379/// 
380/// # Examples
381/// 
382/// ```rust
383/// use tree_parser::get_file_name_without_extension;
384/// 
385/// assert_eq!(get_file_name_without_extension("test.py"), Some("test".to_string()));
386/// assert_eq!(get_file_name_without_extension("path/to/file.rs"), Some("file".to_string()));
387/// assert_eq!(get_file_name_without_extension("no_extension"), Some("no_extension".to_string()));
388/// ```
389pub fn get_file_name_without_extension(path: &str) -> Option<String> {
390    std::path::Path::new(path)
391        .file_stem()
392        .and_then(|name| name.to_str())
393        .map(|name| name.to_string())
394}
395
396/// Check if a path matches any of the specified ignore patterns
397/// 
398/// This function checks whether a given path contains any of the specified
399/// ignore patterns. This is used for filtering out unwanted files and directories
400/// during parsing operations.
401/// 
402/// # Arguments
403/// 
404/// * `path` - The file path to check
405/// * `patterns` - Array of patterns to match against
406/// 
407/// # Returns
408/// 
409/// `true` if the path matches any pattern, `false` otherwise.
410/// 
411/// # Examples
412/// 
413/// ```rust
414/// use tree_parser::matches_ignore_patterns;
415/// 
416/// let patterns = vec!["target".to_string(), "node_modules".to_string()];
417/// 
418/// assert!(matches_ignore_patterns("src/target/debug", &patterns));
419/// assert!(matches_ignore_patterns("frontend/node_modules/react", &patterns));
420/// assert!(!matches_ignore_patterns("src/main.rs", &patterns));
421/// ```
422pub fn matches_ignore_patterns(path: &str, patterns: &[String]) -> bool {
423    for pattern in patterns {
424        if path.contains(pattern) {
425            return true;
426        }
427    }
428    false
429}
430
431/// Sanitize a file path for safe usage
432/// 
433/// This function removes potentially dangerous path components like ".." 
434/// and normalizes path separators to prevent directory traversal attacks
435/// and other security issues.
436/// 
437/// # Arguments
438/// 
439/// * `path` - The file path to sanitize
440/// 
441/// # Returns
442/// 
443/// A sanitized version of the input path.
444/// 
445/// # Examples
446/// 
447/// ```rust
448/// use tree_parser::sanitize_path;
449/// 
450/// assert_eq!(sanitize_path("../../../etc/passwd"), "etc/passwd");
451/// assert_eq!(sanitize_path("src//main.rs"), "src/main.rs");
452/// assert_eq!(sanitize_path("/absolute/path"), "absolute/path");
453/// ```
454pub fn sanitize_path(path: &str) -> String {
455    path.replace("..", "")
456        .replace("//", "/")
457        .trim_start_matches('/')
458        .to_string()
459}
460
461#[cfg(test)]
462mod tests {
463    use super::*;
464
465    #[test]
466    fn test_format_file_size() {
467        assert_eq!(format_file_size(512), "512 B");
468        assert_eq!(format_file_size(1024), "1.00 KB");
469        assert_eq!(format_file_size(1536), "1.50 KB");
470        assert_eq!(format_file_size(1048576), "1.00 MB");
471    }
472
473    #[test]
474    fn test_format_duration() {
475        assert_eq!(format_duration(500), "500ms");
476        assert_eq!(format_duration(1500), "1.50s");
477        assert_eq!(format_duration(65000), "1m 5.00s");
478    }
479
480    #[test]
481    fn test_language_conversion() {
482        assert_eq!(language_from_string("python"), Some(Language::Python));
483        assert_eq!(language_from_string("rust"), Some(Language::Rust));
484        assert_eq!(language_from_string("invalid"), None);
485        
486        assert_eq!(language_to_string(&Language::Python), "Python");
487        assert_eq!(language_to_string(&Language::Rust), "Rust");
488    }
489
490    #[test]
491    fn test_file_extension() {
492        assert_eq!(get_file_extension("test.py"), Some("py".to_string()));
493        assert_eq!(get_file_extension("test.RS"), Some("rs".to_string()));
494        assert_eq!(get_file_extension("test"), None);
495    }
496
497    #[test]
498    fn test_supported_extensions() {
499        assert!(is_supported_extension("py"));
500        assert!(is_supported_extension("rs"));
501        assert!(is_supported_extension("js"));
502        assert!(!is_supported_extension("xyz"));
503    }
504}