palmdoc_compression/
palmdoc.rs1use crate::{
2 hashtable::HashTable,
3 window::{Window, MAX_MATCH_LEN},
4};
5use thiserror::Error;
6
7pub fn compress(data: &[u8]) -> Vec<u8> {
8 let mut out = Vec::with_capacity(data.len());
9
10 let mut window = Window::new();
11 let mut table = HashTable::new();
12
13 let mut offset = 0;
14 while offset < data.len() {
15 let remainder = &data[offset..];
16 if remainder.len() > 3 {
17 let hash = table.hash(&remainder[..3]);
18 table.insert(hash, window.position as u16);
19 if let Some((distance, length)) = table.reference(hash, remainder, &window, offset) {
20 if MAX_MATCH_LEN < offset && offset < data.len() - MAX_MATCH_LEN {
22 let m = distance as u16;
23 let code = 0x8000 + ((m << 3) & 0x3ff8) + ((length as u16) - 3);
24 out.extend(&code.to_be_bytes());
25
26 for _ in 0..length {
27 if offset + 3 < data.len() {
28 let hash = table.hash(&data[offset..offset + 3]);
29 table.insert(hash, window.position as u16);
30 }
31 window.push(data[offset]);
32 offset += 1;
33 }
34
35 continue;
36 }
37 }
38 }
39
40 let byte = data[offset];
42 offset += 1;
43 window.push(byte);
44
45 if byte == b' ' && offset + 1 < data.len() && (0x40..0x80).contains(&data[offset]) {
46 out.push(data[offset] ^ 0x80);
47
48 if offset + 3 < data.len() {
49 table.insert(table.hash(&data[offset..offset + 3]), offset as u16);
50 }
51
52 window.push(data[offset]);
53 offset += 1;
54 continue;
55 }
56
57 if byte == 0 || (byte > 8 && byte < 0x80) {
58 out.push(byte);
59 } else {
60 let mut j = offset;
61 let mut binseq = Vec::with_capacity(8);
62 binseq.push(byte);
63
64 while j < data.len() && binseq.len() < 8 {
65 let ch = data[j];
66 if ch == 0 || (ch > 8 && ch < 0x80) {
67 break;
68 }
69
70 binseq.push(ch);
71
72 if j + 3 < data.len() {
73 table.insert(table.hash(&data[j..j + 3]), j as u16);
74 }
75 window.push(ch);
76
77 j += 1;
78 }
79
80 out.extend(&(binseq.len() as u8).to_be_bytes());
81 out.extend(&binseq);
82 offset += binseq.len() - 1;
83 }
84 }
85
86 out
87}
88
89#[derive(Error, Debug)]
90pub enum DecompressError {
91 #[error("offset to LZ77 bits is outside of the data")]
92 OffsetOutsideData,
93 #[error("LZ77 decompression offset is invalid")]
94 InvalidOffset,
95 #[error("not enough data")]
96 NotEnoughData,
97}
98
99pub fn decompress(data: &[u8]) -> Result<Vec<u8>, DecompressError> {
100 let len = data.len();
102 let mut offset = 0;
103 let mut uncompressed = Vec::new();
104
105 while offset < len {
106 let byte = data[offset];
107 offset += 1;
108
109 if (1..=8).contains(&byte) {
110 if offset + byte as usize > len {
112 return Err(DecompressError::NotEnoughData);
113 }
114
115 uncompressed.extend_from_slice(&data[offset..(offset + byte as usize)]);
116 offset += byte as usize;
117 } else if byte < 128 {
118 uncompressed.push(byte);
120 } else if byte >= 192 {
121 uncompressed.push(b' ');
123 uncompressed.push(byte ^ 0x80);
124 } else if offset < len {
125 offset += 1;
127
128 if offset > len {
129 return Err(DecompressError::OffsetOutsideData);
130 }
131
132 let mut lz77 = u16::from_be_bytes([data[offset - 2], data[offset - 1]]);
133
134 lz77 &= 0x3fff;
136
137 let lz77length = ((lz77 & 0x0007) as usize) + 3;
139
140 let lz77offset = (lz77 >> 3) as usize;
142
143 if lz77offset == 0 {
145 let empty = vec![0u8; lz77length];
146 uncompressed.extend_from_slice(&empty);
147 continue;
148 }
149
150 let mut textlength = uncompressed.len();
152 for _ in 0..lz77length {
153 if let Some(textpos) = textlength.checked_sub(lz77offset) {
154 uncompressed.push(uncompressed[textpos]);
155 textlength += 1;
156 } else {
157 return Err(DecompressError::InvalidOffset);
158 }
159 }
160 }
161 }
162
163 Ok(uncompressed)
164}
165
166#[cfg(test)]
167mod tests {
168 use lipsum::lipsum;
169 use pretty_assertions::assert_eq;
170
171 use super::*;
172
173 fn get_calibre_testcases() -> Vec<(Vec<u8>, Vec<u8>)> {
174 return vec![
177 (
178 hex::decode("616263030405066d73").unwrap(),
179 hex::decode("61626304030405066d73").unwrap(),
180 ),
181 (
182 hex::decode("612062206320fe6420").unwrap(),
183 hex::decode("61e2e32001fe6420").unwrap(),
184 ),
185 (
186 hex::decode("303132333435363738396178797a326278797a3263646667666f39697579657268")
187 .unwrap(),
188 hex::decode("303132333435363738396178797a3262802963646667666f39697579657268")
189 .unwrap(),
190 ),
191 (
192 hex::decode("30313233343536373839617364303132333435363738396173647c79797a7a7878666668686a6a6b6b").unwrap(),
193 hex::decode("30313233343536373839617364806f80687c79797a7a7878666668686a6a6b6b").unwrap()
194 ),
195 (
196 hex::decode("6369657761636e6171206569753734332072373837712030772520203b207361206664ef0c6664786f73616320776f636a702061636f6965636f776569206f77616963206a6f63696f7761706a636976636a706f69766a706f726569766a706f617663613b207039617738373433793672373425245e245e253820").unwrap(),
197 hex::decode("6369657761636e6171e56975373433f2373837712030772520203bf361e66401ef0c6664786f736163f76f636a70e1636f6965636f776569ef77616963ea6f63698050706a63697681086f697680287265803a617663613bf03961773882f0793672373425245e245e253820").unwrap()
198 ),
199 (
200 hex::decode("61626373646661736466616263646173646f66617373").unwrap(),
201 hex::decode("61626373646661736466616263646173646f66617373").unwrap(),
202 ),
203 ];
204 }
205
206 #[test]
207 fn test_compress_palmdoc() {
208 for (input, expected) in get_calibre_testcases() {
209 let compressed = compress(&input);
210 assert_eq!(compressed, expected);
211 }
212 }
213
214 #[test]
215 fn test_decompress_palmdoc() {
216 for (expected, compressed) in get_calibre_testcases() {
217 let decompressed = decompress(&compressed).unwrap();
218 assert_eq!(decompressed, expected);
219 }
220 }
221
222 #[test]
223 fn test_roundtrip() {
224 let input = lipsum(4096);
225 let input = input.as_bytes()[..4096].to_vec();
226
227 let compressed = compress(&input);
228 let decompressed = decompress(&compressed).unwrap();
229 assert_eq!(input, decompressed);
230 }
231
232 #[test]
233 fn test_null_run_length() {
234 let compressed = hex::decode("61726973f46ff6697302c2ad6974cd61646502c2ad6d6f6902c2ad73656c6c65c7656f7267653bf4686174e174e86572e86f7573658050e3616d65f57002c2ad6f6ec26f6e6102c2ad70617274652cf7686fe16c82206fe56e02c2ad6a6f796564821865e681206d81f8e16302c2ad747265737303e2809980b2766f72732ce16e81418340696ee884d870811802c2ad656e6365ce61706f6c6583506861839070656e82996f81e06c6c817082706fef6e65ef66834c696e7486986e67e669748722776869636885b9776173f3756284c065637483a38099746884c17481fa64756384c873ed657202c2ad63792ed480a86c61829074880873873883f068696d859585586d616702c2ad88216e696d83c881507988af74658393738af071756584d96c79f2807070616964e279e4656174682e3c2f703e0a0909093c70e36c6173733d2263616c69627265313722e169643d22374b344755223e848173746f02c2ad727985ea76658048897085117984aa88a285f96573884b2ce58a607083a063698ce984088774706f8968890065726580827285f076616c89316402c2ad648e1985728da06f8729697a87f08b696180d06f74688320723b8854893964696573ec6f6f6b813888b1886989e16564870f870f870f870e56223e03e2809c436861728a596e672103e2809df389514183906e61d002c3a1766c6f7680627769746883e8878a71756987908fb1676c6191b187146c6988c9968070726980b87373844f844f844f844d4830844f844f92488a4a902182ef82ef9640737469636b844b81116e658bb0648110954c9a31776f726be173e995886ff48ce17489486679980b81b98e1f739be88af86661813063813092b974699868977c905d90108dd893029509833866726f6de780f084618188897269748807880788078806319353768398636f6d96309be2832984318ea397fb73696c83b884086169a0e88569736d6993308c627261748198667593b382029a1b87d0636f8731875975a1d0627574ea75877886b06e8f778f76a2e3686164eb657088a897e87463940882e0e5799ff86e8882796f7583c06d90b08179a438616c8e18889b2cee88788d0885daa6a2a003616c8d9d6f6f95f897e19921865876858868a8c99dd58a328349619ea86202c3a98f908338808068758d78726986db9a628ec96375652ed06965729b0885e184c889f06781337374a1008650884a9e40a1308ef5833f833b80406f89088091629e19971a9018706f779c21a475a55f9fa089b88d589e2885d392a7a0b98410a3e0899784c8a7c273a621706c652d6d696e883881386584a06781c16e970a898965788121ab4889b3ad826584f2a0b094792ea808860877a2c28af685216c9a1174a2c98b8583068b9c80a16e8631758250a57b8fe1ae1983e177687990f7a0676482c195236f76a37f97379737973748329f3297496d65616eb028726506e2808ae280a68b578b578b52b5014584d16f70976ba9bb6768b3589c9c706588208a182ca1998e2701a9b3fb617987da2e2083c849b77881b8ab336eab3191a173619e80666f72ab4a83bc97439fee6c696b65d2758169696103e2809462618110802a6963a2d994899cd9a9c993486286b0019480408da086689850bf2a6689ab90c790c3b107815995a086b89a788c52698872686192d9bb098528bb216fba9c81a26d90c29fb96e89e58a6f8a6f8a6a80588a6d961b69b4706f756ca2206176b49ba9706c64abf18e978e978e978e95338e92429960686f778e819701882167950a86c0844e3f8d019ce48cd96285986785e16e8d01837f837f837f837d34223e4182c46d8fb0a13994f794f763616d65f57087192cb9028a0373ada2a8a98b9b846b885073ba0987f14999f081608c008679af836f648fde6e84e09f206dab302ed481879e6c668f798691a350a0346368616ea3f98b69b2a17375a8e982486f6693209cc17369a7a9848980926390aaa73875be8094d19feaa6ea739458a347a34568952862b8c98ec8616c8bd968696d9db8aeb0a74d93b267a72b776fa388656e8b878b878b878b86358efa49e16dabc185798689a687934972b3f1955980e87993fd836087a263b45974758f98ba0d8db8b6996574a3386d6f80c886d970b74aa3f3815566658550bc19699cc081bf81b8a0a0a50b4998e1af5399fa87819e496692ea87a1ba12658b188a9891ba819c6e6f74f9951881da699170b232696e6b83f58f4ea2ba9ed2686589b789b789b789b636223e4e83006c65b0e1abc2a4b788b29834b592619c619627bc079eea89098e3c88518fad85706b65657081216df580f8649ef1831073a2d176a5c4bf30726f75a9608143a142b96bbd2892f881a063698180636c86d63c3f786d6cbc387293693d22312e30228fb8636f6486383d225554462d38223f3e0a3c6874813881586e8818687474703a2f2f7777772e77332e6f72672f313939392f78812122835881d1656e2d5553223e0a20203ca729804980587469746c653e49563c2f805380b36d657461e882a82d65717569763d22436f6eb618742d5479706522887980713d22746578742f83293b9b39727365743d75746685082f83426c8e7172656c3d227374796c65ad20657422f4822081dc63737322e87265663d226bbbf06c653a666c6f773a303030313f6d90a03d81368288827f827f827f827f827f827f32827f827e2f883d3c626f64a14092779270926438494c323089a88cb873a0c8b451646174612d70ab4088098040742d312d3122e98170882070b2c880892d3422f26f6c84d0646f632d80cc89d0832b657075622d85c92d8bb2706167838f4c3231838a093c2f839c80703c2f852185aa8b708ff134858f97fb36821732bf308e996834995f816266697273748470696c64817732b2104a7573951ab7b09e09746894da76b972959972a3d28618ad34647261b6809efa6f6f6d3ad07269b7499a29647202c3a979c26f6c6b02c3b3aae902c2ad6b699a5b6ca9218778f081727373ad98e8aec9629c982ec8b4b36197e9aab16ebcd0736fa0d0b68883506d619a78a0e0adf884a0756da070699ad02caab386d86d2c874065617263b86066659c11a7b0732ec57602c2ad6572009b888001840287").unwrap();
235
236 let expected_data = hex::decode("6172697320746f20766973c2ad6974204d616465c2ad6d6f69c2ad73656c6c652047656f7267653b20746861742061742068657220686f7573652068652063616d65207570c2ad6f6e20426f6e61c2ad70617274652c2077686f20616cc2ad736f20656ec2ad6a6f79656420746865206661c2ad6d6f7573206163c2ad7472657373e28099206661c2ad766f72732c20616e64207468617420696e206869732070726573c2ad656e6365204e61706f6c656f6e20686170c2ad70656e656420746f2066616c6c20696ec2ad746f206f6e65206f6620746865206661696e74c2ad696e67206669747320746f2077686963682068652077617320737562c2ad6a6563742c20616e642077617320746875732061742074686520647563e2809973206d6572c2ad63792e20546865206c6174c2ad746572207370617265642068696d2c20616e642074686973206d6167c2ad6e61c2ad6e696dc2ad69c2ad747920426f6e61c2ad706172746520737562c2ad7365c2ad7175656e74c2ad6c79207265c2ad706169642062792064656174682e3c2f703e0a0909093c7020636c6173733d2263616c69627265313722206169643d22374b344755223e5468652073746fc2ad72792077617320766572792070726574c2ad747920616e6420696ec2ad746572c2ad657374c2ad696e672c206573c2ad7065c2ad6369616cc2ad6c792061742074686520706f696e7420776865726520746865207269c2ad76616c7320737564c2ad64656ec2ad6c7920726563c2ad6f67c2ad6e697a6564206f6e6520616ec2ad6f7468c2ad65723b20616e6420746865206c6164696573206c6f6f6b6564206167c2ad69c2ad746174c2ad65642e3c2f703e0a0909093c7020636c6173733d2263616c69627265313722206169643d22374b344756223ee2809c436861726dc2ad696e6721e2809d207361696420416ec2ad6e612050c3a1766c6f76c2ad6e61207769746820616e20696ec2ad71756972c2ad696e6720676c616e636520617420746865206c6974c2ad746c65207072696e636573732e3c2f703e0a0909093c7020636c6173733d2263616c69627265313722206169643d22374b344830223ee2809c436861726dc2ad696e6721e2809d2077686973c2ad706572656420746865206c6974c2ad746c65207072696e636573732c20737469636bc2ad696e6720746865206e6565c2ad646c6520696ec2ad746f2068657220776f726b20617320696620746f20746573c2ad7469c2ad667920746861742074686520696ec2ad746572c2ad65737420616e6420666173c2ad6369c2ad6e61c2ad74696f6e206f66207468652073746fc2ad727920707265c2ad76656e74c2ad6564206865722066726f6d20676fc2ad696e67206f6e20776974682069742e3c2f703e0a0909093c7020636c6173733d2263616c69627265313722206169643d22374b344831223e546865207669c2ad636f6d7465206170c2ad707265c2ad6369c2ad6174c2ad656420746869732073696c656e742070726169736520616e6420736d696cc2ad696e67206772617465c2ad66756cc2ad6c7920707265c2ad706172656420746f20636f6ec2ad74696ec2ad75652c20627574206a757374207468656e20416ec2ad6e612050c3a1766c6f76c2ad6e612c2077686f20686164206b6570742061207761746368c2ad66756c20657965206f6e2074686520796f756e67206d616e2077686f20736f20616c61726d6564206865722c206e6fc2ad74696365642074686174206865207761732074616c6bc2ad696e6720746f6f206c6f7564c2ad6c7920616e64207665c2ad6865c2ad6d656e74c2ad6c79207769746820746865206162c2ad62c3a92c20736f2073686520687572c2ad7269656420746f2074686520726573c2ad6375652e2050696572726520686164206d616ec2ad6167656420746f207374617274206120636f6ec2ad766572c2ad7361c2ad74696f6e207769746820746865206162c2ad62c3a92061626f7574207468652062616cc2ad616e6365206f6620706f77c2ad65722c20616e6420746865206c6174c2ad7465722c206576c2ad69c2ad64656e74c2ad6c7920696ec2ad746572c2ad657374c2ad65642062792074686520796f756e67206d616ee28099732073696dc2ad706c652d6d696e64c2ad6564206561c2ad676572c2ad6e6573732c20776173206578c2ad706c61696ec2ad696e67206869732070657420746865c2ad6fc2ad72792e20426f746820776572652074616c6bc2ad696e6720616e64206c6973c2ad74656ec2ad696e6720746f6f206561c2ad676572c2ad6c7920616e6420746f6f206e6174c2ad75c2ad72616cc2ad6c792c207768696368207761732077687920416ec2ad6e612050c3a1766c6f76c2ad6e6120646973c2ad6170c2ad70726f7665642e3c2f703e0a0909093c7020636c6173733d2263616c69627265313722206169643d22374b344832223ee2809c546865206d65616e7320617265e2808ae280a6207468652062616cc2ad616e6365206f6620706f77c2ad657220696e204575c2ad726f706520616e642074686520726967687473206f66207468652070656fc2ad706c652ce2809d20746865206162c2ad62c3a92077617320736179c2ad696e672e20e2809c4974206973206f6ec2ad6c79206e6563c2ad6573c2ad7361727920666f72206f6e6520706f77c2ad6572c2ad66756c206e61c2ad74696f6e206c696b6520527573c2ad736961e28094626172c2ad626172c2ad696320617320736865206973207361696420746f206265e28094746f20706c61636520686572c2ad73656c6620646973c2ad696ec2ad746572c2ad657374c2ad6564c2ad6c79206174207468652068656164206f6620616e20616cc2ad6c69616e636520686176c2ad696e6720666f7220697473206f62c2ad6a65637420746865206d61696ec2ad7465c2ad6e616e6365206f66207468652062616cc2ad616e6365206f6620706f77c2ad6572206f66204575c2ad726f70652c20616e6420697420776f756c6420736176652074686520776f726c6421e2809d3c2f703e0a0909093c7020636c6173733d2263616c69627265313722206169643d22374b344833223ee2809c42757420686f772061726520796f7520746f2067657420746861742062616cc2ad616e63653fe2809d2050696572726520776173206265c2ad67696ec2ad6e696e672e3c2f703e0a0909093c7020636c6173733d2263616c69627265313722206169643d22374b344834223e41742074686174206d6fc2ad6d656e7420416ec2ad6e612050c3a1766c6f76c2ad6e612063616d6520757020616e642c206c6f6f6bc2ad696e67207365c2ad76657265c2ad6c79206174205069657272652c2061736b656420746865204974616cc2ad69616e20686f772068652073746f6f6420527573c2ad7369616e20636c69c2ad6d6174652e20546865204974616cc2ad69616ee2809973206661636520696ec2ad7374616e74c2ad6c79206368616e67656420616e64206173c2ad73756d656420616e206f66c2ad66656ec2ad73697665c2ad6c79206166c2ad66656374c2ad65642c20737567c2ad617279206578c2ad70726573c2ad73696f6e2c206576c2ad69c2ad64656e74c2ad6c79206861c2ad626974c2ad75c2ad616c20746f2068696d207768656e20636f6ec2ad76657273c2ad696e67207769746820776f6dc2ad656e2e3c2f703e0a0909093c7020636c6173733d2263616c69627265313722206169643d22374b344835223ee2809c4920616d20736f20656ec2ad6368616e74c2ad656420627920746865206272696cc2ad6c69616ec2ad6379206f66207468652077697420616e642063756cc2ad74757265206f662074686520736fc2ad6369c2ad6574792c206d6f7265206573c2ad7065c2ad6369616cc2ad6c79206f66207468652066656dc2ad69c2ad6e696e6520736fc2ad6369c2ad6574792c20696e20776869636820492068617665206861642074686520686f6ec2ad6f72206f66206265c2ad696e67207265c2ad6365697665642c207468617420492068617665206e6f7420796574206861642074696d6520746f207468696e6b206f662074686520636c69c2ad6d6174652ce2809d20736169642068652e3c2f703e0a0909093c7020636c6173733d2263616c69627265313722206169643d22374b344836223e4e6f74206c6574c2ad74696e6720746865206162c2ad62c3a920616e6420506965727265206573c2ad636170652c20416ec2ad6e612050c3a1766c6f76c2ad6e612c20746865206d6f726520636f6ec2ad7665c2ad6e69656e74c2ad6c7920746f206b656570207468656d20756ec2ad646572206f62c2ad736572c2ad7661c2ad74696f6e2c2062726f75676874207468656d20696ec2ad746f20746865206c617267c2ad657220636972c2ad636c652e3c2f703e0a09093c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d38223f3e0a3c68746d6c20786d6c6e733d22687474703a2f2f7777772e77332e6f72672f313939392f7868746d6c22206c616e673d22656e2d5553223e0a20203c686561643e0a202020203c7469746c653e49563c2f7469746c653e0a202020203c6d65746120687474702d65717569763d22436f6e74656e742d547970652220636f6e74656e743d22746578742f68746d6c3b20636861727365743d7574662d38222f3e0a20203c6c696e6b2072656c3d227374796c6573686565742220747970653d22746578742f6373732220687265663d226b696e646c653a666c6f773a303030313f6d696d653d746578742f637373222f3e0a3c6c696e6b2072656c3d227374796c6573686565742220747970653d22746578742f6373732220687265663d226b696e646c653a666c6f773a303030323f6d696d653d746578742f637373222f3e0a3c2f686561643e0a20203c626f647920636c6173733d2263616c6962726522206169643d2238494c3230223e0a09093c73656374696f6e20646174612d706172656e743d22706172742d312d31222069643d22636861707465722d312d312d342220726f6c653d22646f632d636861707465722220636c6173733d22657075622d747970652d7469746c657061676522206169643d2238494c3231223e0a0909093c2f73656374696f6e3e0a093c2f626f64793e0a3c2f68746d6c3e0a3c683420636c6173733d2263616c69627265313622206169643d2238494c3232223e49563c2f68343e0a0909093c7020636c6173733d2266697273742d6368696c6422206169643d2238494c3233223e4a757374207468656e20616ec2ad6f7468c2ad657220766973c2ad69c2ad746f7220656ec2ad7465726564207468652064726177c2ad696e6720726f6f6d3a205072696e636520416ec2ad6472c3a97920426f6c6bc3b36ec2ad73c2ad6b692c20746865206c6974c2ad746c65207072696e63657373e2809920687573c2ad62616e642e20486520776173206120766572792068616e64c2ad736f6d6520796f756e67206d616e2c206f66206d656469c2ad756d206865696768742c2077697468206669726d2c20636c65617263757420666561c2ad74757265732e204576c2ad657200617267000000006c65207072").unwrap();
237
238 let decompressed = decompress(&compressed).unwrap();
239
240 assert_eq!(decompressed, expected_data);
241 }
242}