1use anyhow::Result;
10use chardetng::EncodingDetector;
11use encoding_rs::Encoding;
12use std::path::Path;
13use tokio::fs;
14use walkdir::WalkDir;
15use std::io::{self, Write};
16
17fn should_process_file(path: &Path, allowed_extensions: &[String]) -> bool {
28 if let Some(extension) = path.extension() {
29 if let Some(ext) = extension.to_str() {
30 return allowed_extensions.iter().any(|allowed| allowed.eq_ignore_ascii_case(ext));
31 }
32 }
33 false
34}
35
36fn default_allowed_extensions() -> Vec<String> {
38 vec![
39 "c", "h", "cpp", "hpp", "cc", "cxx", "rs", "ts", "tsx", "js", "jsx", "txt", "html", "htm", "xml", "py", "java", "go", "md", "markdown", "json", "yaml", "yml", "toml", "sh", "bash", "sql", "css", "scss", "sass", ]
53 .into_iter()
54 .map(String::from)
55 .collect()
56}
57
58pub async fn scan_directory(dir_path: &str, convert: bool, verbose: bool, types: Option<Vec<String>>) -> Result<()> {
83 let allowed_extensions = types.unwrap_or_else(default_allowed_extensions);
84 let mut found_non_utf8 = false;
85
86 for entry in WalkDir::new(dir_path).into_iter().filter_map(|e| e.ok()) {
88 if entry.file_type().is_file() {
89 let path = entry.path();
90 if !should_process_file(path, &allowed_extensions) {
91 if verbose {
92 println!("Skip: {}", path.display());
93 }
94 continue;
95 }
96
97 if let Some(encoding_info) = detect_file_encoding(path).await? {
98 if encoding_info.0 != "UTF-8" {
99 found_non_utf8 = true;
100 if verbose {
101 let content = fs::read(path).await?;
102 println!(
103 "File: {}\nEncoding: {}\nSize: {} bytes\n",
104 path.display(),
105 encoding_info.0,
106 content.len(),
107 );
108 } else {
109 println!("{} {}", path.display(), encoding_info.0);
110 }
111 }
112 }
113 }
114 }
115
116 if !found_non_utf8 {
117 return Ok(());
118 }
119
120 let should_convert = if convert {
121 true
122 } else {
123 print!("\nConvert to UTF-8? (y/n): ");
124 io::stdout().flush()?;
125
126 let mut input = String::new();
127 io::stdin().read_line(&mut input)?;
128
129 input.trim().eq_ignore_ascii_case("y")
130 };
131
132 if should_convert {
133 for entry in WalkDir::new(dir_path).into_iter().filter_map(|e| e.ok()) {
135 if entry.file_type().is_file() {
136 let path = entry.path();
137 if !should_process_file(path, &allowed_extensions) {
138 continue;
139 }
140 if let Some(encoding_info) = detect_file_encoding(path).await? {
141 if encoding_info.0 != "UTF-8" {
142 convert_to_utf8(path, encoding_info.1).await?;
143 println!("Converted: {}", path.display());
144 }
145 }
146 }
147 }
148 }
149
150 Ok(())
151}
152
153async fn detect_file_encoding(path: &Path) -> Result<Option<(&'static str, &'static Encoding)>> {
163 let content = fs::read(path).await?;
164 let mut detector = EncodingDetector::new();
165 detector.feed(&content, true);
166 let encoding = detector.guess(None, true);
167
168 if encoding.name() != "UTF-8" {
169 Ok(Some((encoding.name(), encoding)))
170 } else {
171 Ok(None)
172 }
173}
174
175async fn convert_to_utf8(path: &Path, encoding: &'static Encoding) -> Result<()> {
186 let content = fs::read(path).await?;
187 let (decoded, _, had_errors) = encoding.decode(&content);
188 if had_errors {
189 println!("Warning: decoding error in {}", path.display());
190 }
191
192 fs::write(path, decoded.as_bytes()).await?;
193 Ok(())
194}