file_kitty/
encoding.rs

1//! Module for file encoding detection and conversion
2//! 
3//! This module provides functionality to:
4//! - Detect file encodings
5//! - Convert files to UTF-8
6//! - Skip binary files automatically
7//! - Process directories recursively
8
9use anyhow::Result;
10use chardetng::EncodingDetector;
11use encoding_rs::Encoding;
12use std::path::Path;
13use tokio::fs;
14use walkdir::WalkDir;
15use std::io::{self, Write};
16
17/// Checks if a file should be skipped (binary files or other files that don't need processing)
18/// 
19/// # Arguments
20/// 
21/// * `path` - The path to check
22/// 
23/// # Returns
24/// 
25/// `true` if the file should be skipped, `false` otherwise
26fn should_skip_file(path: &Path) -> bool {
27    let binary_extensions = [
28        // Executable files
29        "exe", "dll", "so", "dylib",
30        // Compressed files
31        "zip", "rar", "7z", "gz", "tar",
32        // Image files
33        "jpg", "jpeg", "png", "gif", "bmp", "ico",
34        // Audio/Video files
35        "mp3", "mp4", "avi", "mov", "wav",
36        // Document formats
37        "pdf", "doc", "docx", "xls", "xlsx",
38        // Other binary formats
39        "bin", "dat"
40    ];
41
42    if let Some(extension) = path.extension() {
43        if let Some(ext) = extension.to_str() {
44            return binary_extensions.contains(&ext.to_lowercase().as_str());
45        }
46    }
47    false
48}
49
50/// Scans a directory for non-UTF-8 encoded files and optionally converts them to UTF-8
51/// 
52/// # Arguments
53/// 
54/// * `dir_path` - The directory path to scan
55/// * `auto_convert` - Whether to automatically convert files to UTF-8
56/// * `verbose` - Whether to show detailed encoding information
57/// 
58/// # Returns
59/// 
60/// Returns `Ok(())` if the operation was successful, or an error if something went wrong
61/// 
62/// # Example
63/// 
64/// ```no_run
65/// use file_kitty::encoding::scan_directory;
66/// 
67/// #[tokio::main]
68/// async fn main() -> anyhow::Result<()> {
69///     scan_directory("./my_project", false, false).await?;
70///     Ok(())
71/// }
72/// ```
73pub async fn scan_directory(dir_path: &str, auto_convert: bool, verbose: bool) -> Result<()> {
74    let mut found_non_utf8 = false;
75    
76    // First scan and display all non-UTF-8 files
77    for entry in WalkDir::new(dir_path).into_iter().filter_map(|e| e.ok()) {
78        if entry.file_type().is_file() {
79            let path = entry.path();
80            if should_skip_file(path) {
81                if verbose {
82                    println!("Skipping binary file: {}", path.display());
83                }
84                continue;
85            }
86            
87            if let Some(encoding_info) = detect_file_encoding(path).await? {
88                if encoding_info.0 != "UTF-8" {
89                    found_non_utf8 = true;
90                    if verbose {
91                        let content = fs::read(path).await?;
92                        println!(
93                            "File: {}\nEncoding: {}\nSize: {} bytes\n",
94                            path.display(),
95                            encoding_info.0,
96                            content.len(),
97                        );
98                    } else {
99                        println!(
100                            "Find: {} encoding: {}",
101                            path.display(),
102                            encoding_info.0
103                        );
104                    }
105                }
106            }
107        }
108    }
109    
110    if !found_non_utf8 {
111        println!("No non-UTF-8 encoded files found");
112        return Ok(());
113    }
114    
115    let should_convert = if auto_convert {
116        true
117    } else {
118        // Ask user if they want to process these files
119        print!("\nDo you want to convert the above files to UTF-8 encoding? (y/n): ");
120        io::stdout().flush()?;
121        
122        let mut input = String::new();
123        io::stdin().read_line(&mut input)?;
124        
125        input.trim().eq_ignore_ascii_case("y")
126    };
127    
128    if should_convert {
129        // Iterate again and perform conversion
130        for entry in WalkDir::new(dir_path).into_iter().filter_map(|e| e.ok()) {
131            if entry.file_type().is_file() {
132                let path = entry.path();
133                if should_skip_file(path) {
134                    continue;
135                }
136                if let Some(encoding_info) = detect_file_encoding(path).await? {
137                    if encoding_info.0 != "UTF-8" {
138                        convert_to_utf8(path, encoding_info.1).await?;
139                        println!("File {} converted to UTF-8 encoding", path.display());
140                    }
141                }
142            }
143        }
144    }
145    
146    Ok(())
147}
148
149/// Detects the encoding of a file
150/// 
151/// # Arguments
152/// 
153/// * `path` - Path to the file to check
154/// 
155/// # Returns
156/// 
157/// Returns `Some((encoding_name, encoding))` if non-UTF-8, `None` if UTF-8
158async fn detect_file_encoding(path: &Path) -> Result<Option<(&'static str, &'static Encoding)>> {
159    let content = fs::read(path).await?;
160    let mut detector = EncodingDetector::new();
161    detector.feed(&content, true);
162    let encoding = detector.guess(None, true);
163    
164    if encoding.name() != "UTF-8" {
165        Ok(Some((encoding.name(), encoding)))
166    } else {
167        Ok(None)
168    }
169}
170
171/// Converts a file to UTF-8 encoding
172/// 
173/// # Arguments
174/// 
175/// * `path` - Path to the file to convert
176/// * `encoding` - The current encoding of the file
177/// 
178/// # Returns
179/// 
180/// Returns `Ok(())` if conversion was successful, or an error if something went wrong
181async fn convert_to_utf8(path: &Path, encoding: &'static Encoding) -> Result<()> {
182    let content = fs::read(path).await?;
183    let (decoded, _, had_errors) = encoding.decode(&content);
184    if had_errors {
185        println!("Warning: Error occurred during decoding");
186    }
187    
188    fs::write(path, decoded.as_bytes()).await?;
189    Ok(())
190}