1use std::{
4 fmt,
5 io::{self, Seek, SeekFrom, Write},
6 mem,
7};
8
9use bitflags::bitflags;
10use bstr::{BStr, ByteSlice};
11
12mod hash;
13
14const CHUNK_ID: &[u8; 4] = b"CQDB";
15const BYTEORDER_CHECK: u32 = 0x62445371;
16const NUM_TABLES: usize = 256;
17
18bitflags! {
19 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
21 pub struct Flag: u32 {
22 const NONE = 0;
24 const ONEWAY = 0x00000001;
26 }
27}
28
29#[inline(always)]
33fn read_u32_le(buf: &[u8], offset: usize) -> u32 {
34 let b = &buf[offset..offset + 4];
35 u32::from_le_bytes([b[0], b[1], b[2], b[3]])
36}
37
38#[inline(always)]
39fn pack_u32(value: u32) -> [u8; 4] {
40 value.to_le_bytes()
41}
42
43#[derive(Debug, Clone, Copy, Default)]
45struct ReadTable {
46 offset: usize,
48 num: u32,
50}
51
52#[derive(Clone)]
54pub struct CQDB<'a> {
55 buffer: &'a [u8],
57 header: Header,
59 tables: [ReadTable; NUM_TABLES],
61 bwd_offset: usize,
63 num: u32,
65}
66
67#[derive(Debug, Clone)]
69#[repr(C)]
70struct Header {
71 chunk_id: [u8; 4],
73 size: u32,
75 flag: u32,
77 byteorder: u32,
79 bwd_size: u32,
81 bwd_offset: u32,
83}
84
85#[derive(Debug, Clone, Default)]
87struct Table {
88 size: usize,
89 num: u32,
91 bucket: Vec<Bucket>,
93}
94
95#[repr(C)]
96struct TableRef {
97 offset: u32,
99 num: u32,
101}
102
103#[derive(Debug, Default, Clone, Copy)]
105#[repr(C)]
106struct Bucket {
107 hash: u32,
109 offset: u32,
111}
112
113pub struct CQDBWriter<T: Write + Seek> {
115 writer: T,
116 flag: Flag,
118 begin: u32,
120 current: u32,
122 tables: [Table; NUM_TABLES],
124 bwd: Vec<u32>,
126 bwd_num: u32,
127 bwd_size: u32,
129}
130
131impl<'a> fmt::Debug for CQDB<'a> {
132 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
133 f.debug_struct("CQDB")
134 .field("header", &self.header)
135 .field("bwd_offset", &self.bwd_offset)
136 .field("num", &self.num)
137 .finish()
138 }
139}
140
141impl<T: Write + Seek + fmt::Debug> fmt::Debug for CQDBWriter<T> {
142 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
143 f.debug_struct("CQDBWriter")
144 .field("writer", &self.writer)
145 .field("flag", &self.flag)
146 .field("begin", &self.begin)
147 .field("current", &self.current)
148 .field("bwd", &self.bwd)
149 .field("bwd_num", &self.bwd_num)
150 .field("bwd_size", &self.bwd_size)
151 .finish()
152 }
153}
154
155impl<'a> CQDB<'a> {
156 pub fn new(buf: &'a [u8]) -> io::Result<Self> {
157 let min_size = mem::size_of::<Header>() + mem::size_of::<TableRef>() * NUM_TABLES;
158 if buf.len() < min_size {
159 return Err(io::Error::other("invalid file format"));
161 }
162 if &buf[0..4] != CHUNK_ID {
164 return Err(io::Error::other("invalid file format, magic mismatch"));
165 }
166 let chunk_size = read_u32_le(buf, 4);
167 let flag = read_u32_le(buf, 8);
168 let byte_order = read_u32_le(buf, 12);
169 if byte_order != BYTEORDER_CHECK {
171 return Err(io::Error::other("invalid file format, byte order mismatch"));
172 }
173 let bwd_size = read_u32_le(buf, 16);
174 let bwd_offset_raw = read_u32_le(buf, 20);
175 let header = Header {
176 chunk_id: *CHUNK_ID,
177 size: chunk_size,
178 flag,
179 byteorder: byte_order,
180 bwd_size,
181 bwd_offset: bwd_offset_raw,
182 };
183
184 let mut num_db = 0u32;
186 let mut tables = [ReadTable::default(); NUM_TABLES];
187 let mut index = 24; for table in &mut tables {
189 let table_offset = read_u32_le(buf, index) as usize;
190 index += 4;
191 let table_num = read_u32_le(buf, index);
192 index += 4;
193 if table_offset > 0 {
194 let end = (table_num as usize)
196 .checked_mul(8)
197 .and_then(|bytes| table_offset.checked_add(bytes));
198 match end {
199 Some(end) if end <= buf.len() => {
200 table.offset = table_offset;
201 table.num = table_num;
202 }
203 _ => return Err(io::Error::other("invalid table data: out of bounds")),
204 }
205 }
206 num_db += table_num / 2;
208 }
209
210 let bwd_offset = if bwd_offset_raw > 0 {
212 let off = bwd_offset_raw as usize;
213 let end = (bwd_size as usize)
214 .checked_mul(4)
215 .and_then(|bytes| off.checked_add(bytes));
216 match end {
217 Some(end) if end <= buf.len() => off,
218 _ => {
219 return Err(io::Error::other(
220 "invalid backward link data: out of bounds",
221 ));
222 }
223 }
224 } else {
225 0
226 };
227
228 Ok(Self {
229 buffer: buf,
230 header,
231 tables,
232 bwd_offset,
233 num: num_db,
234 })
235 }
236
237 #[inline]
239 pub fn num(&self) -> u32 {
240 self.num
241 }
242
243 #[inline]
245 pub fn to_id(&self, s: &str) -> Option<u32> {
246 let hash = crate::hash::jhash(s.as_bytes(), s.len() as u32 + 1, 0);
247 let table = &self.tables[(hash % NUM_TABLES as u32) as usize];
248 if table.num > 0 {
249 let n = table.num;
250 let base = table.offset;
251 let mut k = (hash >> 8) % n;
252 loop {
253 let bk = &self.buffer[base + (k as usize) * 8..][..8];
255 let bucket_offset = u32::from_le_bytes([bk[4], bk[5], bk[6], bk[7]]);
256 if bucket_offset > 0 {
257 let bucket_hash = u32::from_le_bytes([bk[0], bk[1], bk[2], bk[3]]);
258 if bucket_hash == hash {
259 let rec_start = bucket_offset as usize;
261 let rec = self.buffer.get(rec_start..rec_start + 8)?;
262 let value = u32::from_le_bytes([rec[0], rec[1], rec[2], rec[3]]);
263 let ksize = (u32::from_le_bytes([rec[4], rec[5], rec[6], rec[7]]) as usize)
264 .checked_sub(1)?; let key_end = rec_start.checked_add(8 + ksize)?;
266 if s.as_bytes() == self.buffer.get(rec_start + 8..key_end)? {
267 return Some(value);
268 }
269 }
270 } else {
271 break;
272 }
273 k = (k + 1) % n;
274 }
275 }
276 None
277 }
278
279 #[inline]
281 pub fn to_str(&'a self, id: u32) -> Option<&'a BStr> {
282 if self.bwd_offset > 0 && id < self.header.bwd_size {
284 let offset = read_u32_le(self.buffer, self.bwd_offset + (id as usize) * 4);
286 if offset > 0 {
287 let index = offset as usize + 4; let rec = self.buffer.get(index..index + 4)?;
290 let value_size = (u32::from_le_bytes([rec[0], rec[1], rec[2], rec[3]]) as usize)
291 .checked_sub(1)?; let start = index + 4;
293 let end = start.checked_add(value_size)?;
294 return Some(self.buffer.get(start..end)?.as_bstr());
295 }
296 }
297 None
298 }
299
300 pub fn iter(&'a self) -> Iter<'a> {
302 Iter { db: self, next: 0 }
303 }
304}
305
306pub struct Iter<'a> {
308 db: &'a CQDB<'a>,
309 next: u32,
310}
311
312impl<'a> Iterator for Iter<'a> {
313 type Item = io::Result<(u32, &'a BStr)>;
314
315 fn next(&mut self) -> Option<Self::Item> {
316 let id = self.next;
317 if let Some(s) = self.db.to_str(id) {
318 self.next += 1;
319 return Some(Ok((id, s)));
320 }
321 None
322 }
323
324 fn size_hint(&self) -> (usize, Option<usize>) {
325 let remaining = if self.db.bwd_offset > 0 {
326 self.db.header.bwd_size.saturating_sub(self.next) as usize
327 } else {
328 0
329 };
330 (0, Some(remaining))
331 }
332}
333
334impl<'a> IntoIterator for &'a CQDB<'a> {
335 type Item = io::Result<(u32, &'a BStr)>;
336 type IntoIter = Iter<'a>;
337
338 #[inline]
339 fn into_iter(self) -> Self::IntoIter {
340 self.iter()
341 }
342}
343
344impl<T: Write + Seek> CQDBWriter<T> {
345 pub fn new(writer: T) -> io::Result<Self> {
347 Self::with_flag(writer, Flag::NONE)
348 }
349
350 pub fn with_flag(mut writer: T, flag: Flag) -> io::Result<Self> {
352 let begin = writer.stream_position()? as u32;
353 let current = (mem::size_of::<Header>() + mem::size_of::<TableRef>() * NUM_TABLES) as u32;
354 writer.seek(SeekFrom::Start((begin + current) as u64))?;
356 Ok(Self {
357 writer,
358 flag,
359 begin,
360 current,
361 tables: std::array::from_fn(|_| Table::default()),
362 bwd: Vec::new(),
363 bwd_num: 0,
364 bwd_size: 0,
365 })
366 }
367
368 pub fn put<K: AsRef<[u8]>>(&mut self, key: K, id: u32) -> io::Result<()> {
370 let key = key.as_ref();
371 let key_size = key.len() as u32 + 1; let hash = crate::hash::jhash(key, key_size, 0);
373 let table = &mut self.tables[hash as usize % 256];
374 let record_len = 8 + key.len() + 1;
376 if record_len <= 264 {
377 let mut buf = [0u8; 264]; buf[0..4].copy_from_slice(&pack_u32(id));
379 buf[4..8].copy_from_slice(&pack_u32(key_size));
380 buf[8..8 + key.len()].copy_from_slice(key);
381 self.writer.write_all(&buf[..record_len])?;
383 } else {
384 self.writer.write_all(&pack_u32(id))?;
386 self.writer.write_all(&pack_u32(key_size))?;
387 self.writer.write_all(key)?;
388 self.writer.write_all(b"\0")?;
389 }
390 if table.size <= table.num as usize {
392 table.size = (table.size + 1) * 2;
393 table.bucket.resize(table.size, Bucket::default());
394 }
395 table.bucket[table.num as usize].hash = hash;
397 table.bucket[table.num as usize].offset = self.current;
398 table.num += 1;
399 if !self.flag.contains(Flag::ONEWAY) {
401 if self.bwd_size <= id {
403 let mut size = self.bwd_size;
404 while size <= id {
405 size = (size + 1) * 2;
406 }
407 self.bwd.resize(size as usize, 0);
408 self.bwd_size = size;
409 }
410 if self.bwd_num <= id {
411 self.bwd_num = id + 1;
412 }
413 self.bwd[id as usize] = self.current;
414 }
415 self.current += 4 + 4 + key_size;
417 Ok(())
418 }
419
420 fn close(&mut self) -> io::Result<()> {
422 let mut header = Header {
423 chunk_id: *CHUNK_ID,
424 flag: self.flag.bits(),
425 byteorder: BYTEORDER_CHECK,
426 bwd_offset: 0,
427 bwd_size: self.bwd_num,
428 size: 0,
429 };
430 let mut dst: Vec<Bucket> = Vec::new();
434 #[cfg(not(target_endian = "little"))]
435 let mut write_buf: Vec<u8> = Vec::new();
436 for i in 0..NUM_TABLES {
437 let table = &self.tables[i];
438 if table.bucket.is_empty() {
440 continue;
441 }
442 let n = table.num * 2;
445 let n_usize = n as usize;
446 dst.clear();
448 dst.resize(n_usize, Bucket::default());
449 for j in 0..table.num as usize {
451 let src = &table.bucket[j];
452 let mut k = (src.hash >> 8) % n;
453 while dst[k as usize].offset != 0 {
455 k = (k + 1) % n;
456 }
457 dst[k as usize].hash = src.hash;
459 dst[k as usize].offset = src.offset;
460 }
461 #[cfg(target_endian = "little")]
464 {
465 let bytes =
466 unsafe { std::slice::from_raw_parts(dst.as_ptr() as *const u8, n_usize * 8) };
467 self.writer.write_all(bytes)?;
468 }
469 #[cfg(not(target_endian = "little"))]
470 {
471 write_buf.clear();
472 write_buf.reserve(n_usize * 8);
473 for bucket in &dst[..n_usize] {
474 write_buf.extend_from_slice(&pack_u32(bucket.hash));
475 write_buf.extend_from_slice(&pack_u32(bucket.offset));
476 }
477 self.writer.write_all(&write_buf)?;
478 }
479 }
480 if !self.flag.contains(Flag::ONEWAY) && self.bwd_size > 0 {
482 let current_offset = self.writer.stream_position()? as u32;
484 header.bwd_offset = current_offset - self.begin;
485 #[cfg(target_endian = "little")]
487 {
488 let bytes = unsafe {
489 std::slice::from_raw_parts(
490 self.bwd.as_ptr() as *const u8,
491 self.bwd_num as usize * 4,
492 )
493 };
494 self.writer.write_all(bytes)?;
495 }
496 #[cfg(not(target_endian = "little"))]
497 {
498 write_buf.clear();
499 write_buf.reserve(self.bwd_num as usize * 4);
500 for i in 0..self.bwd_num as usize {
501 write_buf.extend_from_slice(&pack_u32(self.bwd[i]));
502 }
503 self.writer.write_all(&write_buf)?;
504 }
505 }
506 let offset = self.writer.stream_position()? as u32;
508 header.size = offset - self.begin;
509 self.writer.seek(SeekFrom::Start(self.begin as u64))?;
511 let mut hdr_buf = [0u8; 24 + NUM_TABLES * 8];
513 hdr_buf[0..4].copy_from_slice(&header.chunk_id);
514 hdr_buf[4..8].copy_from_slice(&pack_u32(header.size));
515 hdr_buf[8..12].copy_from_slice(&pack_u32(header.flag));
516 hdr_buf[12..16].copy_from_slice(&pack_u32(header.byteorder));
517 hdr_buf[16..20].copy_from_slice(&pack_u32(header.bwd_size));
518 hdr_buf[20..24].copy_from_slice(&pack_u32(header.bwd_offset));
519 for i in 0..NUM_TABLES {
522 let table_num = self.tables[i].num;
523 let table_offset = if table_num > 0 { self.current } else { 0 };
525 let off = 24 + i * 8;
526 hdr_buf[off..off + 4].copy_from_slice(&pack_u32(table_offset));
527 hdr_buf[off + 4..off + 8].copy_from_slice(&pack_u32(table_num * 2));
529 self.current += table_num * 2 * std::mem::size_of::<Bucket>() as u32;
531 }
532 self.writer.write_all(&hdr_buf)?;
533 self.writer.seek(SeekFrom::Start(offset as u64))?;
535 Ok(())
536 }
537}
538
539impl<T: Write + Seek> Drop for CQDBWriter<T> {
540 fn drop(&mut self) {
541 if let Ok(()) = self.close() {}
542 }
543}