file_kitty/
encoding.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
//! Module for file encoding detection and conversion
//! 
//! This module provides functionality to:
//! - Detect file encodings
//! - Convert files to UTF-8
//! - Skip binary files automatically
//! - Process directories recursively

use anyhow::Result;
use chardetng::EncodingDetector;
use encoding_rs::Encoding;
use std::path::Path;
use tokio::fs;
use walkdir::WalkDir;
use std::io::{self, Write};

/// Checks if a file should be skipped (binary files or other files that don't need processing)
/// 
/// # Arguments
/// 
/// * `path` - The path to check
/// 
/// # Returns
/// 
/// `true` if the file should be skipped, `false` otherwise
fn should_skip_file(path: &Path) -> bool {
    let binary_extensions = [
        // Executable files
        "exe", "dll", "so", "dylib",
        // Compressed files
        "zip", "rar", "7z", "gz", "tar",
        // Image files
        "jpg", "jpeg", "png", "gif", "bmp", "ico",
        // Audio/Video files
        "mp3", "mp4", "avi", "mov", "wav",
        // Document formats
        "pdf", "doc", "docx", "xls", "xlsx",
        // Other binary formats
        "bin", "dat"
    ];

    if let Some(extension) = path.extension() {
        if let Some(ext) = extension.to_str() {
            return binary_extensions.contains(&ext.to_lowercase().as_str());
        }
    }
    false
}

/// Scans a directory for non-UTF-8 encoded files and optionally converts them to UTF-8
/// 
/// # Arguments
/// 
/// * `dir_path` - The directory path to scan
/// * `auto_convert` - Whether to automatically convert files to UTF-8
/// * `verbose` - Whether to show detailed encoding information
/// 
/// # Returns
/// 
/// Returns `Ok(())` if the operation was successful, or an error if something went wrong
/// 
/// # Example
/// 
/// ```no_run
/// use file_kitty::encoding::scan_directory;
/// 
/// #[tokio::main]
/// async fn main() -> anyhow::Result<()> {
///     scan_directory("./my_project", false, false).await?;
///     Ok(())
/// }
/// ```
pub async fn scan_directory(dir_path: &str, auto_convert: bool, verbose: bool) -> Result<()> {
    let mut found_non_utf8 = false;
    
    // First scan and display all non-UTF-8 files
    for entry in WalkDir::new(dir_path).into_iter().filter_map(|e| e.ok()) {
        if entry.file_type().is_file() {
            let path = entry.path();
            if should_skip_file(path) {
                if verbose {
                    println!("Skipping binary file: {}", path.display());
                }
                continue;
            }
            
            if let Some(encoding_info) = detect_file_encoding(path).await? {
                if encoding_info.0 != "UTF-8" {
                    found_non_utf8 = true;
                    if verbose {
                        let content = fs::read(path).await?;
                        println!(
                            "File: {}\nEncoding: {}\nSize: {} bytes\n",
                            path.display(),
                            encoding_info.0,
                            content.len(),
                        );
                    } else {
                        println!(
                            "Find: {} encoding: {}",
                            path.display(),
                            encoding_info.0
                        );
                    }
                }
            }
        }
    }
    
    if !found_non_utf8 {
        println!("No non-UTF-8 encoded files found");
        return Ok(());
    }
    
    let should_convert = if auto_convert {
        true
    } else {
        // Ask user if they want to process these files
        print!("\nDo you want to convert the above files to UTF-8 encoding? (y/n): ");
        io::stdout().flush()?;
        
        let mut input = String::new();
        io::stdin().read_line(&mut input)?;
        
        input.trim().eq_ignore_ascii_case("y")
    };
    
    if should_convert {
        // Iterate again and perform conversion
        for entry in WalkDir::new(dir_path).into_iter().filter_map(|e| e.ok()) {
            if entry.file_type().is_file() {
                let path = entry.path();
                if should_skip_file(path) {
                    continue;
                }
                if let Some(encoding_info) = detect_file_encoding(path).await? {
                    if encoding_info.0 != "UTF-8" {
                        convert_to_utf8(path, encoding_info.1).await?;
                        println!("File {} converted to UTF-8 encoding", path.display());
                    }
                }
            }
        }
    }
    
    Ok(())
}

/// Detects the encoding of a file
/// 
/// # Arguments
/// 
/// * `path` - Path to the file to check
/// 
/// # Returns
/// 
/// Returns `Some((encoding_name, encoding))` if non-UTF-8, `None` if UTF-8
async fn detect_file_encoding(path: &Path) -> Result<Option<(&'static str, &'static Encoding)>> {
    let content = fs::read(path).await?;
    let mut detector = EncodingDetector::new();
    detector.feed(&content, true);
    let encoding = detector.guess(None, true);
    
    if encoding.name() != "UTF-8" {
        Ok(Some((encoding.name(), encoding)))
    } else {
        Ok(None)
    }
}

/// Converts a file to UTF-8 encoding
/// 
/// # Arguments
/// 
/// * `path` - Path to the file to convert
/// * `encoding` - The current encoding of the file
/// 
/// # Returns
/// 
/// Returns `Ok(())` if conversion was successful, or an error if something went wrong
async fn convert_to_utf8(path: &Path, encoding: &'static Encoding) -> Result<()> {
    let content = fs::read(path).await?;
    let (decoded, _, had_errors) = encoding.decode(&content);
    if had_errors {
        println!("Warning: Error occurred during decoding");
    }
    
    fs::write(path, decoded.as_bytes()).await?;
    Ok(())
}