use anyhow::{Context, Result};
use flate2::read::GzDecoder;
use std::fs::File;
use std::io::{BufRead, BufReader, Read, Seek};
use std::path::Path;
const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b];
pub fn open_reader<P: AsRef<Path>>(path: P) -> Result<Box<dyn BufRead>> {
let path = path.as_ref();
let mut file =
File::open(path).with_context(|| format!("Failed to open file: {}", path.display()))?;
let mut magic = [0u8; 2];
let bytes_read = file
.read(&mut magic)
.with_context(|| format!("Failed to read from file: {}", path.display()))?;
file.seek(std::io::SeekFrom::Start(0))
.with_context(|| format!("Failed to seek in file: {}", path.display()))?;
if bytes_read >= 2 && magic == GZIP_MAGIC {
log::debug!("Detected gzip compression: {}", path.display());
let decoder = GzDecoder::new(file);
Ok(Box::new(BufReader::new(decoder)))
} else {
Ok(Box::new(BufReader::new(file)))
}
}
pub const FNV1A_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
const FNV1A_PRIME: u64 = 0x0000_0100_0000_01b3;
#[inline(always)]
pub fn fnv1a(bytes: &[u8]) -> u64 {
let mut hash = FNV1A_OFFSET;
fnv1a_update(&mut hash, bytes);
hash
}
#[inline(always)]
pub fn fnv1a_update(hash: &mut u64, bytes: &[u8]) {
for &b in bytes {
*hash ^= b as u64;
*hash = hash.wrapping_mul(FNV1A_PRIME);
}
}
pub fn format_with_commas(n: u64) -> String {
let s = n.to_string();
let bytes = s.as_bytes();
let len = bytes.len();
if len <= 3 {
return s;
}
let mut result = String::with_capacity(len + (len - 1) / 3);
for (i, &b) in bytes.iter().enumerate() {
if i > 0 && (len - i).is_multiple_of(3) {
result.push(',');
}
result.push(b as char);
}
result
}
pub fn median(values: &[f64]) -> f64 {
if values.is_empty() {
return 0.0;
}
let mut sorted: Vec<f64> = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
if n.is_multiple_of(2) {
(sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
} else {
sorted[n / 2]
}
}
#[cfg(test)]
mod tests {
use super::*;
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
fn write_temp_plain(name: &str, content: &str) -> std::path::PathBuf {
let dir = std::env::temp_dir();
let path = dir.join(name);
std::fs::write(&path, content).unwrap();
path
}
fn write_temp_gz(name: &str, content: &str) -> std::path::PathBuf {
let dir = std::env::temp_dir();
let path = dir.join(name);
let file = File::create(&path).unwrap();
let mut encoder = GzEncoder::new(file, Compression::default());
encoder.write_all(content.as_bytes()).unwrap();
encoder.finish().unwrap();
path
}
#[test]
fn test_fnv1a_deterministic() {
let hash1 = fnv1a(b"test_read_name");
let hash2 = fnv1a(b"test_read_name");
assert_eq!(hash1, hash2);
let hash3 = fnv1a(b"other_read_name");
assert_ne!(hash1, hash3);
}
#[test]
fn test_fnv1a_streaming_matches_oneshot() {
let oneshot = fnv1a(b"hello world");
let mut streaming = FNV1A_OFFSET;
fnv1a_update(&mut streaming, b"hello ");
fnv1a_update(&mut streaming, b"world");
assert_eq!(oneshot, streaming);
}
#[test]
fn test_format_with_commas() {
assert_eq!(format_with_commas(0), "0");
assert_eq!(format_with_commas(999), "999");
assert_eq!(format_with_commas(1000), "1,000");
assert_eq!(format_with_commas(1234567), "1,234,567");
}
#[test]
fn test_open_reader_plain() {
let content = "line1\nline2\nline3\n";
let path = write_temp_plain("rustqc_test_io_plain.txt", content);
let reader = open_reader(&path).unwrap();
let lines: Vec<String> = reader.lines().map(|l| l.unwrap()).collect();
assert_eq!(lines, vec!["line1", "line2", "line3"]);
}
#[test]
fn test_open_reader_gzip() {
let content = "line1\nline2\nline3\n";
let path = write_temp_gz("rustqc_test_io_gzip.txt.gz", content);
let reader = open_reader(&path).unwrap();
let lines: Vec<String> = reader.lines().map(|l| l.unwrap()).collect();
assert_eq!(lines, vec!["line1", "line2", "line3"]);
}
#[test]
fn test_open_reader_empty_file() {
let path = write_temp_plain("rustqc_test_io_empty.txt", "");
let reader = open_reader(&path).unwrap();
let lines: Vec<String> = reader.lines().map(|l| l.unwrap()).collect();
assert!(lines.is_empty());
}
#[test]
fn test_open_reader_nonexistent() {
let result = open_reader("/nonexistent/path/file.txt");
assert!(result.is_err());
}
}