file_kitty/encoding.rs
1//! Module for file encoding detection and conversion
2//!
3//! This module provides functionality to:
4//! - Detect file encodings
5//! - Convert files to UTF-8
6//! - Skip binary files automatically
7//! - Process directories recursively
8
9use anyhow::Result;
10use chardetng::EncodingDetector;
11use encoding_rs::Encoding;
12use std::path::Path;
13use tokio::fs;
14use walkdir::WalkDir;
15use std::io::{self, Write};
16
17/// Checks if a file should be skipped (binary files or other files that don't need processing)
18///
19/// # Arguments
20///
21/// * `path` - The path to check
22///
23/// # Returns
24///
25/// `true` if the file should be skipped, `false` otherwise
26fn should_skip_file(path: &Path) -> bool {
27 let binary_extensions = [
28 // Executable files
29 "exe", "dll", "so", "dylib",
30 // Compressed files
31 "zip", "rar", "7z", "gz", "tar",
32 // Image files
33 "jpg", "jpeg", "png", "gif", "bmp", "ico",
34 // Audio/Video files
35 "mp3", "mp4", "avi", "mov", "wav",
36 // Document formats
37 "pdf", "doc", "docx", "xls", "xlsx",
38 // Other binary formats
39 "bin", "dat"
40 ];
41
42 if let Some(extension) = path.extension() {
43 if let Some(ext) = extension.to_str() {
44 return binary_extensions.contains(&ext.to_lowercase().as_str());
45 }
46 }
47 false
48}
49
50/// Scans a directory for non-UTF-8 encoded files and optionally converts them to UTF-8
51///
52/// # Arguments
53///
54/// * `dir_path` - The directory path to scan
55/// * `auto_convert` - Whether to automatically convert files to UTF-8
56/// * `verbose` - Whether to show detailed encoding information
57///
58/// # Returns
59///
60/// Returns `Ok(())` if the operation was successful, or an error if something went wrong
61///
62/// # Example
63///
64/// ```no_run
65/// use file_kitty::encoding::scan_directory;
66///
67/// #[tokio::main]
68/// async fn main() -> anyhow::Result<()> {
69/// scan_directory("./my_project", false, false).await?;
70/// Ok(())
71/// }
72/// ```
73pub async fn scan_directory(dir_path: &str, auto_convert: bool, verbose: bool) -> Result<()> {
74 let mut found_non_utf8 = false;
75
76 // First scan and display all non-UTF-8 files
77 for entry in WalkDir::new(dir_path).into_iter().filter_map(|e| e.ok()) {
78 if entry.file_type().is_file() {
79 let path = entry.path();
80 if should_skip_file(path) {
81 if verbose {
82 println!("Skipping binary file: {}", path.display());
83 }
84 continue;
85 }
86
87 if let Some(encoding_info) = detect_file_encoding(path).await? {
88 if encoding_info.0 != "UTF-8" {
89 found_non_utf8 = true;
90 if verbose {
91 let content = fs::read(path).await?;
92 println!(
93 "File: {}\nEncoding: {}\nSize: {} bytes\n",
94 path.display(),
95 encoding_info.0,
96 content.len(),
97 );
98 } else {
99 println!(
100 "Find: {} encoding: {}",
101 path.display(),
102 encoding_info.0
103 );
104 }
105 }
106 }
107 }
108 }
109
110 if !found_non_utf8 {
111 println!("No non-UTF-8 encoded files found");
112 return Ok(());
113 }
114
115 let should_convert = if auto_convert {
116 true
117 } else {
118 // Ask user if they want to process these files
119 print!("\nDo you want to convert the above files to UTF-8 encoding? (y/n): ");
120 io::stdout().flush()?;
121
122 let mut input = String::new();
123 io::stdin().read_line(&mut input)?;
124
125 input.trim().eq_ignore_ascii_case("y")
126 };
127
128 if should_convert {
129 // Iterate again and perform conversion
130 for entry in WalkDir::new(dir_path).into_iter().filter_map(|e| e.ok()) {
131 if entry.file_type().is_file() {
132 let path = entry.path();
133 if should_skip_file(path) {
134 continue;
135 }
136 if let Some(encoding_info) = detect_file_encoding(path).await? {
137 if encoding_info.0 != "UTF-8" {
138 convert_to_utf8(path, encoding_info.1).await?;
139 println!("File {} converted to UTF-8 encoding", path.display());
140 }
141 }
142 }
143 }
144 }
145
146 Ok(())
147}
148
149/// Detects the encoding of a file
150///
151/// # Arguments
152///
153/// * `path` - Path to the file to check
154///
155/// # Returns
156///
157/// Returns `Some((encoding_name, encoding))` if non-UTF-8, `None` if UTF-8
158async fn detect_file_encoding(path: &Path) -> Result<Option<(&'static str, &'static Encoding)>> {
159 let content = fs::read(path).await?;
160 let mut detector = EncodingDetector::new();
161 detector.feed(&content, true);
162 let encoding = detector.guess(None, true);
163
164 if encoding.name() != "UTF-8" {
165 Ok(Some((encoding.name(), encoding)))
166 } else {
167 Ok(None)
168 }
169}
170
171/// Converts a file to UTF-8 encoding
172///
173/// # Arguments
174///
175/// * `path` - Path to the file to convert
176/// * `encoding` - The current encoding of the file
177///
178/// # Returns
179///
180/// Returns `Ok(())` if conversion was successful, or an error if something went wrong
181async fn convert_to_utf8(path: &Path, encoding: &'static Encoding) -> Result<()> {
182 let content = fs::read(path).await?;
183 let (decoded, _, had_errors) = encoding.decode(&content);
184 if had_errors {
185 println!("Warning: Error occurred during decoding");
186 }
187
188 fs::write(path, decoded.as_bytes()).await?;
189 Ok(())
190}