use super::ToolResult;
use std::io::Read;
#[derive(Debug, Clone)]
pub struct ValidationResult {
pub is_valid: bool,
pub invalid_byte_count: usize,
}
fn read_bytes_from_file(path: &str) -> ToolResult<Vec<u8>> {
let mut file =
std::fs::File::open(path).map_err(|e| format!("cannot open file {path:?}: {e}"))?;
let mut buf = Vec::new();
file.read_to_end(&mut buf)
.map_err(|e| format!("cannot read file {path:?}: {e}"))?;
Ok(buf)
}
pub fn validate_bytes(bytes: &[u8]) -> ValidationResult {
match std::str::from_utf8(bytes) {
Ok(_) => ValidationResult {
is_valid: true,
invalid_byte_count: 0,
},
Err(_) => {
let lossy = String::from_utf8_lossy(bytes);
let replacement_count = lossy.chars().filter(|&c| c == '\u{FFFD}').count();
ValidationResult {
is_valid: false,
invalid_byte_count: replacement_count,
}
}
}
}
pub fn fix_bytes(bytes: &[u8]) -> String {
String::from_utf8_lossy(bytes).into_owned()
}
pub async fn run(_input: String, _file: bool, _validate: bool, _fix: bool) -> ToolResult {
let bytes: Vec<u8> = if _file {
read_bytes_from_file(&_input)?
} else {
_input.into_bytes()
};
if _validate {
let result = validate_bytes(&bytes);
if result.is_valid {
println!("Valid UTF-8");
} else {
println!(
"Invalid UTF-8: {} invalid byte sequence(s) found",
result.invalid_byte_count
);
}
}
if _fix {
let fixed = fix_bytes(&bytes);
println!("{fixed}");
}
if !_validate && !_fix {
let output = String::from_utf8_lossy(&bytes);
println!("{output}");
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn test_validate_valid_utf8() {
let result = validate_bytes("hello, world!".as_bytes());
assert!(result.is_valid);
assert_eq!(result.invalid_byte_count, 0);
}
#[test]
fn test_validate_invalid_utf8() {
let bytes: Vec<u8> = vec![b'h', b'i', 0xFF];
let result = validate_bytes(&bytes);
assert!(!result.is_valid);
assert!(result.invalid_byte_count > 0);
}
#[test]
fn test_fix_bytes_valid_input() {
let fixed = fix_bytes("good".as_bytes());
assert_eq!(fixed, "good");
}
#[test]
fn test_fix_bytes_replaces_invalid() {
let bytes: Vec<u8> = vec![b'a', 0xFF, b'b'];
let fixed = fix_bytes(&bytes);
assert!(fixed.contains('\u{FFFD}'));
assert!(fixed.starts_with('a'));
assert!(fixed.ends_with('b'));
}
#[tokio::test]
async fn test_run_validate_valid() {
run("hello".to_string(), false, true, false)
.await
.expect("should succeed");
}
#[tokio::test]
async fn test_run_fix_mode() {
run("good string".to_string(), false, false, true)
.await
.expect("should succeed");
}
#[tokio::test]
async fn test_run_default_mode() {
run("plain".to_string(), false, false, false)
.await
.expect("should succeed");
}
#[tokio::test]
async fn test_run_from_file() {
let dir = std::env::temp_dir();
let path = dir.join("oxirs_utf8_test.txt");
{
let mut f = std::fs::File::create(&path).expect("create temp file");
f.write_all(b"file content").expect("write");
}
let path_str = path.to_string_lossy().into_owned();
run(path_str, true, true, false)
.await
.expect("should read file successfully");
std::fs::remove_file(&path).ok();
}
#[tokio::test]
async fn test_run_missing_file_returns_error() {
let missing = std::env::temp_dir()
.join(format!("oxirs_utf8_nonexistent_{}.txt", std::process::id()))
.to_string_lossy()
.into_owned();
let result = run(missing, true, true, false).await;
assert!(result.is_err());
}
}