omnivore_cli/git/
utils.rs

1use anyhow::Result;
2use std::path::Path;
3
4#[allow(dead_code)]
5pub fn is_text_file(path: &Path) -> bool {
6    if let Some(extension) = path.extension() {
7        if let Some(ext_str) = extension.to_str() {
8            return is_text_extension(ext_str);
9        }
10    }
11    
12    if let Some(file_name) = path.file_name() {
13        if let Some(name_str) = file_name.to_str() {
14            return is_text_filename(name_str);
15        }
16    }
17    
18    false
19}
20
21#[allow(dead_code)]
22fn is_text_extension(ext: &str) -> bool {
23    matches!(
24        ext.to_lowercase().as_str(),
25        "txt" | "md" | "markdown" | "rst" | "adoc" | "org" |
26        "rs" | "go" | "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" |
27        "java" | "kt" | "scala" | "groovy" | "clj" | "cljs" |
28        "py" | "pyi" | "rb" | "lua" | "perl" | "pl" | "pm" |
29        "js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" |
30        "html" | "htm" | "xml" | "xhtml" | "svg" |
31        "css" | "scss" | "sass" | "less" | "styl" |
32        "json" | "yaml" | "yml" | "toml" | "ini" | "cfg" | "conf" |
33        "sh" | "bash" | "zsh" | "fish" | "ps1" | "bat" | "cmd" |
34        "sql" | "graphql" | "gql" |
35        "dockerfile" | "makefile" | "cmake" |
36        "vue" | "svelte" | "elm" | "dart" | "swift" |
37        "r" | "matlab" | "julia" | "nim" | "zig" | "v" |
38        "php" | "asp" | "aspx" | "jsp" |
39        "proto" | "thrift" | "avro" |
40        "tf" | "tfvars" | "hcl" |
41        "nix" | "dhall" | "jsonnet" |
42        "vim" | "el" | "lisp" |
43        "asm" | "s" |
44        "tex" | "bib" | "sty" | "cls"
45    )
46}
47
48#[allow(dead_code)]
49fn is_text_filename(name: &str) -> bool {
50    matches!(
51        name.to_lowercase().as_str(),
52        "readme" | "license" | "contributing" | "changelog" | "authors" |
53        "todo" | "notes" | "install" | "copyright" | "patents" |
54        "makefile" | "dockerfile" | "gemfile" | "rakefile" | "gulpfile" |
55        "gruntfile" | "package.json" | "cargo.toml" | "go.mod" | "go.sum" |
56        "requirements.txt" | "setup.py" | "setup.cfg" | "pyproject.toml" |
57        ".gitignore" | ".dockerignore" | ".npmignore" | ".eslintrc" |
58        ".prettierrc" | ".editorconfig" | ".gitattributes" | ".env" |
59        ".env.example" | ".env.sample" | ".env.template"
60    ) || name.starts_with('.') && !name.contains('.')
61}
62
63#[allow(dead_code)]
64pub fn parse_size_string(size_str: &str) -> Result<u64> {
65    let size_str = size_str.trim().to_uppercase();
66    
67    if let Ok(bytes) = size_str.parse::<u64>() {
68        return Ok(bytes);
69    }
70    
71    let (number_part, unit_part) = split_size_string(&size_str)?;
72    let number: f64 = number_part
73        .parse()
74        .map_err(|_| anyhow::anyhow!("Invalid number: {}", number_part))?;
75    
76    let multiplier = match unit_part.as_str() {
77        "B" | "" => 1_u64,
78        "K" | "KB" => 1_024,
79        "M" | "MB" => 1_048_576,
80        "G" | "GB" => 1_073_741_824,
81        "T" | "TB" => 1_099_511_627_776,
82        _ => return Err(anyhow::anyhow!("Unknown size unit: {}", unit_part)),
83    };
84    
85    Ok((number * multiplier as f64) as u64)
86}
87
88#[allow(dead_code)]
89fn split_size_string(s: &str) -> Result<(String, String)> {
90    let mut number_part = String::new();
91    let mut unit_part = String::new();
92    let mut found_unit = false;
93    
94    for ch in s.chars() {
95        if ch.is_ascii_digit() || ch == '.' {
96            if found_unit {
97                return Err(anyhow::anyhow!("Invalid size format: {}", s));
98            }
99            number_part.push(ch);
100        } else if ch.is_ascii_alphabetic() {
101            found_unit = true;
102            unit_part.push(ch);
103        } else if !ch.is_whitespace() {
104            return Err(anyhow::anyhow!("Invalid character in size: {}", ch));
105        }
106    }
107    
108    if number_part.is_empty() {
109        return Err(anyhow::anyhow!("No number found in size: {}", s));
110    }
111    
112    Ok((number_part, unit_part))
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118
119    #[test]
120    fn test_parse_size_string() {
121        assert_eq!(parse_size_string("1024").unwrap(), 1024);
122        assert_eq!(parse_size_string("10KB").unwrap(), 10240);
123        assert_eq!(parse_size_string("10 KB").unwrap(), 10240);
124        assert_eq!(parse_size_string("1.5MB").unwrap(), 1572864);
125        assert_eq!(parse_size_string("2GB").unwrap(), 2147483648);
126        assert_eq!(parse_size_string("2 GB").unwrap(), 2147483648);
127    }
128
129    #[test]
130    fn test_is_text_file() {
131        assert!(is_text_file(Path::new("test.rs")));
132        assert!(is_text_file(Path::new("README.md")));
133        assert!(is_text_file(Path::new("Dockerfile")));
134        assert!(is_text_file(Path::new(".gitignore")));
135        assert!(!is_text_file(Path::new("image.png")));
136        assert!(!is_text_file(Path::new("binary.exe")));
137    }
138}