use std::cmp::Ordering;
use std::collections::{BinaryHeap, HashMap};
use std::fs::File;
use std::io::{Read, Write};
use std::path::Path;
fn download_if_missing(url: &str, path: &str) -> std::io::Result<()> {
if Path::new(path).exists() {
return Ok(());
}
eprintln!("Downloading {} from GitHub...", path);
let response = ureq::get(url)
.call()
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?;
let mut file = File::create(path)?;
let mut reader = response.into_reader();
std::io::copy(&mut reader, &mut file)?;
eprintln!("Downloaded {} successfully", path);
Ok(())
}
#[derive(Debug)]
struct Node {
freq: f32,
value: Option<char>,
left: Option<Box<Node>>,
right: Option<Box<Node>>,
}
impl Node {
fn leaf(ch: char, freq: f32) -> Self {
Node {
freq,
value: Some(ch),
left: None,
right: None,
}
}
fn internal(left: Node, right: Node) -> Self {
Node {
freq: left.freq + right.freq,
value: None,
left: Some(Box::new(left)),
right: Some(Box::new(right)),
}
}
}
impl Ord for Node {
fn cmp(&self, other: &Self) -> Ordering {
other
.freq
.partial_cmp(&self.freq)
.unwrap_or(Ordering::Equal)
}
}
impl PartialOrd for Node {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Eq for Node {}
impl PartialEq for Node {
fn eq(&self, other: &Self) -> bool {
self.freq == other.freq
}
}
fn build_huffman_tree(frequencies: &[(u32, f32)]) -> Node {
let mut heap = BinaryHeap::new();
for (codepoint, freq) in frequencies {
if let Some(ch) = char::from_u32(*codepoint) {
heap.push(Node::leaf(ch, *freq));
}
}
while heap.len() > 1 {
let left = heap.pop().unwrap();
let right = heap.pop().unwrap();
heap.push(Node::internal(left, right));
}
heap.pop().unwrap()
}
fn generate_codes(tree: &Node) -> HashMap<char, (u32, u8)> {
let mut codes = HashMap::new();
let mut path: Vec<bool> = Vec::new();
fn traverse(node: &Node, path: &mut Vec<bool>, codes: &mut HashMap<char, (u32, u8)>) {
if let Some(ch) = node.value {
let mut bits = 0u32;
let len = path.len();
for (i, &bit) in path.iter().enumerate() {
if bit {
bits |= 1 << (len - 1 - i); }
}
codes.insert(ch, (bits, path.len() as u8));
} else {
if let Some(ref left) = node.left {
path.push(false);
traverse(left, path, codes);
path.pop();
}
if let Some(ref right) = node.right {
path.push(true);
traverse(right, path, codes);
path.pop();
}
}
}
traverse(tree, &mut path, &mut codes);
codes
}
fn main() {
println!("cargo::rustc-check-cfg=cfg(huffman_available)");
println!("cargo:rerun-if-changed=frequencies.bin");
if let (Ok(out_meta), Ok(in_meta)) = (
std::fs::metadata("huffman_codes.bin"),
std::fs::metadata("frequencies.bin"),
) {
if let (Ok(out_time), Ok(in_time)) = (out_meta.modified(), in_meta.modified()) {
if out_time > in_time {
println!("cargo:rustc-cfg=huffman_available");
return;
}
}
}
const FREQUENCIES_URL: &str = "https://github.com/nickspiker/vsf/raw/main/frequencies.bin";
let download_result = download_if_missing(FREQUENCIES_URL, "frequencies.bin");
let mut file = match File::open("frequencies.bin") {
Ok(f) => f,
Err(_) => {
eprintln!("Warning: frequencies.bin not available, skipping Huffman table generation");
eprintln!("Huffman text compression will not be available in this build.");
if download_result.is_err() {
eprintln!(
"Download failed (likely building in restricted environment like docs.rs)"
);
}
return; }
};
let mut bytes = Vec::new();
file.read_to_end(&mut bytes).unwrap();
if &bytes[0..4] != b"FREQ" {
panic!("Invalid frequency file format");
}
let count = u32::from_le_bytes(bytes[8..12].try_into().unwrap()) as usize;
let mut frequencies = Vec::new();
for i in 0..count {
let offset = 12 + i * 8;
let codepoint = u32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap());
let frequency = f32::from_le_bytes(bytes[offset + 4..offset + 8].try_into().unwrap());
frequencies.push((codepoint, frequency));
}
println!("cargo:warning=Loaded {} character frequencies", count);
let tree = build_huffman_tree(&frequencies);
let codes = generate_codes(&tree);
println!("cargo:warning=Generated {} Huffman codes", codes.len());
let mut output = File::create("huffman_codes.bin").unwrap();
output.write_all(b"HUFF").unwrap(); output.write_all(&1u32.to_le_bytes()).unwrap(); output
.write_all(&(codes.len() as u32).to_le_bytes())
.unwrap();
let mut sorted_codes: Vec<_> = codes.iter().collect();
sorted_codes.sort_by_key(|(ch, _)| **ch as u32);
for (ch, (bits, length)) in sorted_codes {
let codepoint = *ch as u32;
let packed = bits | ((*length as u32) << 24);
output.write_all(&codepoint.to_le_bytes()).unwrap();
output.write_all(&packed.to_le_bytes()).unwrap();
}
println!(
"cargo:warning=Generated huffman_codes.bin: {} entries",
codes.len()
);
println!("cargo:rustc-cfg=huffman_available");
}