file_kitty/
encoding.rs

1//! Module for file encoding detection and conversion
2//! 
3//! This module provides functionality to:
4//! - Detect file encodings
5//! - Convert files to UTF-8
6//! - Skip binary files automatically
7//! - Process directories recursively
8
9use anyhow::Result;
10use chardetng::EncodingDetector;
11use encoding_rs::Encoding;
12use std::path::Path;
13use tokio::fs;
14use walkdir::WalkDir;
15use std::io::{self, Write};
16
17/// Checks if a file should be processed based on allowed file extensions
18///
19/// # Arguments
20///
21/// * `path` - The path to check
22/// * `allowed_extensions` - The list of allowed file extensions
23///
24/// # Returns
25///
26/// `true` if the file should be processed, `false` otherwise
27fn should_process_file(path: &Path, allowed_extensions: &[String]) -> bool {
28    if let Some(extension) = path.extension() {
29        if let Some(ext) = extension.to_str() {
30            return allowed_extensions.iter().any(|allowed| allowed.eq_ignore_ascii_case(ext));
31        }
32    }
33    false
34}
35
36/// Returns the default allowed file extensions for text files
37fn default_allowed_extensions() -> Vec<String> {
38    vec![
39        "c", "h", "cpp", "hpp", "cc", "cxx",  // C/C++
40        "rs",  // Rust
41        "ts", "tsx", "js", "jsx",  // TypeScript/JavaScript
42        "txt",  // Plain text
43        "html", "htm", "xml",  // Markup
44        "py",  // Python
45        "java",  // Java
46        "go",  // Go
47        "md", "markdown",  // Markdown
48        "json", "yaml", "yml", "toml",  // Config files
49        "sh", "bash",  // Shell scripts
50        "sql",  // SQL
51        "css", "scss", "sass",  // Stylesheets
52    ]
53    .into_iter()
54    .map(String::from)
55    .collect()
56}
57
58/// Scans a directory for non-UTF-8 encoded files and optionally converts them to UTF-8
59///
60/// # Arguments
61///
62/// * `dir_path` - The directory path to scan
63/// * `convert` - Whether to automatically convert files to UTF-8
64/// * `verbose` - Whether to show detailed encoding information
65/// * `types` - Optional list of file extensions to process. If None, uses default list.
66///
67/// # Returns
68///
69/// Returns `Ok(())` if the operation was successful, or an error if something went wrong
70///
71/// # Example
72///
73/// ```no_run
74/// use file_kitty::encoding::scan_directory;
75///
76/// #[tokio::main]
77/// async fn main() -> anyhow::Result<()> {
78///     scan_directory("./my_project", false, false, None).await?;
79///     Ok(())
80/// }
81/// ```
82pub async fn scan_directory(dir_path: &str, convert: bool, verbose: bool, types: Option<Vec<String>>) -> Result<()> {
83    let allowed_extensions = types.unwrap_or_else(default_allowed_extensions);
84    let mut found_non_utf8 = false;
85    
86    // First scan and display all non-UTF-8 files
87    for entry in WalkDir::new(dir_path).into_iter().filter_map(|e| e.ok()) {
88        if entry.file_type().is_file() {
89            let path = entry.path();
90            if !should_process_file(path, &allowed_extensions) {
91                if verbose {
92                    println!("Skip: {}", path.display());
93                }
94                continue;
95            }
96            
97            if let Some(encoding_info) = detect_file_encoding(path).await? {
98                if encoding_info.0 != "UTF-8" {
99                    found_non_utf8 = true;
100                    if verbose {
101                        let content = fs::read(path).await?;
102                        println!(
103                            "File: {}\nEncoding: {}\nSize: {} bytes\n",
104                            path.display(),
105                            encoding_info.0,
106                            content.len(),
107                        );
108                    } else {
109                        println!("{} {}", path.display(), encoding_info.0);
110                    }
111                }
112            }
113        }
114    }
115    
116    if !found_non_utf8 {
117        return Ok(());
118    }
119    
120    let should_convert = if convert {
121        true
122    } else {
123        print!("\nConvert to UTF-8? (y/n): ");
124        io::stdout().flush()?;
125
126        let mut input = String::new();
127        io::stdin().read_line(&mut input)?;
128
129        input.trim().eq_ignore_ascii_case("y")
130    };
131    
132    if should_convert {
133        // Iterate again and perform conversion
134        for entry in WalkDir::new(dir_path).into_iter().filter_map(|e| e.ok()) {
135            if entry.file_type().is_file() {
136                let path = entry.path();
137                if !should_process_file(path, &allowed_extensions) {
138                    continue;
139                }
140                if let Some(encoding_info) = detect_file_encoding(path).await? {
141                    if encoding_info.0 != "UTF-8" {
142                        convert_to_utf8(path, encoding_info.1).await?;
143                        println!("Converted: {}", path.display());
144                    }
145                }
146            }
147        }
148    }
149    
150    Ok(())
151}
152
153/// Detects the encoding of a file
154/// 
155/// # Arguments
156/// 
157/// * `path` - Path to the file to check
158/// 
159/// # Returns
160/// 
161/// Returns `Some((encoding_name, encoding))` if non-UTF-8, `None` if UTF-8
162async fn detect_file_encoding(path: &Path) -> Result<Option<(&'static str, &'static Encoding)>> {
163    let content = fs::read(path).await?;
164    let mut detector = EncodingDetector::new();
165    detector.feed(&content, true);
166    let encoding = detector.guess(None, true);
167    
168    if encoding.name() != "UTF-8" {
169        Ok(Some((encoding.name(), encoding)))
170    } else {
171        Ok(None)
172    }
173}
174
175/// Converts a file to UTF-8 encoding
176/// 
177/// # Arguments
178/// 
179/// * `path` - Path to the file to convert
180/// * `encoding` - The current encoding of the file
181/// 
182/// # Returns
183/// 
184/// Returns `Ok(())` if conversion was successful, or an error if something went wrong
185async fn convert_to_utf8(path: &Path, encoding: &'static Encoding) -> Result<()> {
186    let content = fs::read(path).await?;
187    let (decoded, _, had_errors) = encoding.decode(&content);
188    if had_errors {
189        println!("Warning: decoding error in {}", path.display());
190    }
191
192    fs::write(path, decoded.as_bytes()).await?;
193    Ok(())
194}