pcompress/
encode.rs

1use std::io::{BufRead, BufWriter, Read, Write};
2
3use super::diff::Diff;
4
5pub fn encode<R: Read, W: Write>(
6    reader: &mut std::io::BufReader<R>,
7    mut writer: &mut std::io::BufWriter<W>,
8    extreme: bool,
9) {
10    let mut prev_mapping: Vec<usize> = Vec::new();
11    let mut delta: Diff = Diff::new();
12    let mut alt_delta: Diff = Diff::new(); // unused if extreme is false
13
14    let mut line = String::new();
15    loop {
16        let bytes = reader.read_line(&mut line).unwrap();
17        if bytes == 0 {
18            // EOF; reset
19            break;
20        }
21        let mut mapping: Vec<usize> =
22            serde_json::from_str(line.trim()).expect("Could not read input.");
23        let (mut delta, _written) = compute_diff(&prev_mapping, &mapping, &mut delta);
24
25        // See if we can swap district labels around for better compression
26        if extreme {
27            // assumes only two districts are being swapped
28            let mut counter: usize = 0;
29            let mut first: usize = 0;
30            let mut second: usize = 0;
31
32            for (district, nodes) in delta.diff.iter().enumerate() {
33                if !nodes.is_empty() {
34                    if counter == 0 {
35                        first = district;
36                    } else if counter == 1 {
37                        second = district;
38                    }
39                    counter += 1;
40                }
41            }
42
43            if counter == 2 {
44                // ensure only two districts are being swapped
45                let swapped_mapping: Vec<usize> = mapping
46                    .iter()
47                    .map(|district| {
48                        if *district == first {
49                            return second;
50                        } else if *district == second {
51                            return first;
52                        } else {
53                            return *district;
54                        }
55                    })
56                    .collect::<Vec<usize>>();
57                let (alt_delta, _alt_written) =
58                    compute_diff(&prev_mapping, &swapped_mapping, &mut alt_delta);
59                if alt_delta
60                    .diff
61                    .iter()
62                    .map(|nodes| nodes.len())
63                    .sum::<usize>()
64                    < delta.diff.iter().map(|nodes| nodes.len()).sum::<usize>()
65                {
66                    mapping = swapped_mapping;
67                    delta = alt_delta;
68                }
69            }
70        }
71
72        writer = export_diff(writer, delta);
73        prev_mapping = mapping;
74
75        line.clear();
76    }
77
78    writer.flush().unwrap();
79}
80
81pub fn compute_diff<'a>(
82    prev_mapping: &[usize],
83    new_mapping: &[usize],
84    delta: &'a mut Diff,
85) -> (&'a Diff, bool) {
86    delta.reset();
87
88    let mut written = false;
89    for (node, district) in new_mapping.iter().enumerate() {
90        if node >= prev_mapping.len() || prev_mapping[node] != *district {
91            // difference detected
92            written = true;
93            delta.add(*district, node);
94        }
95    }
96    (delta, written)
97}
98
99pub fn export_diff<'a, W: std::io::Write>(
100    writer: &'a mut BufWriter<W>,
101    delta: &Diff,
102) -> &'a mut BufWriter<W> {
103    // Exports diff to custom binary representation
104    let mut first = true;
105
106    let mut skipped_districts: u8 = 0;
107    for (_district, nodes) in delta.diff.iter().enumerate() {
108        if nodes.is_empty() {
109            skipped_districts += 1;
110        } else {
111            // if skipped_districts > 0 { // need to write skipped district marker
112            // }
113            if !first {
114                writer.write_all(&(u16::MAX - 1).to_be_bytes()).unwrap(); // write district marker (16)
115                writer.write_all(&skipped_districts.to_be_bytes()).unwrap(); // write number of skipped district(s) (8)
116            }
117
118            for node in nodes {
119                // TODO: sort
120                writer.write_all(&(*node as u16).to_be_bytes()).unwrap();
121                // write node (16)
122            }
123            skipped_districts = 1;
124        }
125        first = false;
126    }
127    writer.write_all(&u16::MAX.to_be_bytes()).unwrap(); // write district marker (16)
128                                                        // write end of diff marker (16)
129    writer
130}