1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
/*! This crate implements various [CqlType](https://docs.rs/cql_model/0.2/cql_model/trait.CqlType.html) derivatives for storing String values of up to (and including) 255 chars in a [CQL database](https://docs.rs/cql_db/0.2/cql_db/). Will allocate 1020 bytes per value [linked](https://docs.rs/cql_db/0.2/cql_db/fn.link_dimensions.html). # Benchmarks Benchmarks supplied below are fairly rudimentary (and rounded) and are there to give a rough idea of relative costs. Full benchmark code can be found in [github](https://github.com/AndrewSisley/CQLDb/tree/master/cql_storage_types/cql_tiny_text) and can be run with `rustup run nightly cargo bench`, but please be aware that they will allocate ~102MB of disk space. The read_to_stream benchmarks also differ slightly from other [CqlType](https://docs.rs/cql_model/0.2/cql_model/trait.CqlType.html) derivatives as they stream into a Vector, not an Array. Operation | Database dimensions | Mean time (ns) --- | --- | --- Single point read | 1 | 3 060 (+/- 200) Single point read | 4 | 15 800 (+/- 1 100) Single point write | 1 | 2 800 (+/- 300) Single point write | 4 | 15 400 (+/- 1 000) Stream read 1 point | 1 | 3 500 (+/- 300) Stream read 1 point | 4 | 15 500 (+/- 1 100) Stream read 50 000 points | 1 | 56 700 000 (+/- 800 000) Stream read 50 000 points | 4 | 56 400 000 (+/- 150 000) # Examples The following creates a 1D database, writes 2 values to it, and then streams them into an array. ``` # use std::convert::TryFrom; # use std::io::{ Cursor, SeekFrom, Seek }; # use cql_tiny_text::{ TinyText, unpack_stream }; # # use std::error::Error; # use std::fs::remove_file; # fn main() -> Result<(), Box<dyn Error>> { # let _ = remove_file(format!("{}{}", DATABASE_LOCATION, "/db")); # let _ = remove_file(format!("{}{}", DATABASE_LOCATION, "/ax")); # # const DATABASE_LOCATION: &str = "./.test_db"; const N_VALUES_TO_READ: usize = 3; let base_point = [1]; let value1 = "item one"; let value3 = "شماره ۳"; cql_db::create_db::<TinyText>( DATABASE_LOCATION, &[3] ); cql_db::write_value::<TinyText>( DATABASE_LOCATION, &base_point, TinyText::try_from(value1)? )?; cql_db::write_value::<TinyText>( DATABASE_LOCATION, &[base_point[0] + 2], TinyText::try_from(value3)? )?; let mut result = Vec::with_capacity(N_VALUES_TO_READ); let mut stream = Cursor::new(Vec::new()); cql_db::read_to_stream::<TinyText>( DATABASE_LOCATION, &mut stream, &base_point, N_VALUES_TO_READ as u64 )?; stream.seek(SeekFrom::Start(0)); unpack_stream(&mut stream, N_VALUES_TO_READ, |_, value| { result.push(value) })?; assert_eq!(result[0], TinyText::try_from(value1)?); assert_eq!(result[1], TinyText::new()); assert_eq!(result[2], TinyText::try_from(value3)?); # Ok(()) # } ``` */ #![doc(html_root_url = "https://docs.rs/cql_tiny_text/0.2.1")] pub mod errors; pub mod interop; use std::fs::{ File, OpenOptions }; use std::io; use std::io::{ Read, Write, Cursor, SeekFrom, Seek }; use byteorder::{ ReadBytesExt, WriteBytesExt, LittleEndian }; use cql_model::{ CqlType, CqlWritable, CqlReadable, CqlStreamReadable }; const CONTENT_SIZE: usize = (255 * 4); const LENGTH_SIZE: usize = 2; /// Tuple wrapping `String` for working with `TinyText` values in a [CQL database](https://docs.rs/cql_db/). /// /// Limited in size to `255 * 4 = 1020` bytes. #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)] pub struct TinyText(String); impl CqlType for TinyText { type ValueType = Self; const VALUE_SIZE: usize = CONTENT_SIZE + LENGTH_SIZE; } impl TinyText { pub fn new() -> Self { TinyText(String::new()) } } impl CqlWritable for TinyText { fn write_to_db(db_location: &str, value_location: u64, input_value: Self::ValueType) -> io::Result<()> { let mut file = OpenOptions::new().write(true).open(db_location)?; // unwrap should be considered safe by this point, with earlier checks in the cql_db crate (if not deliberately unchecked) file.seek(SeekFrom::Start(value_location * Self::VALUE_SIZE as u64)).unwrap(); let input_length: u16 = input_value.0.len() as u16; let mut buffer = vec![]; buffer.write_u16::<LittleEndian>(input_length)?; buffer.extend(&input_value.0.into_bytes()); file.write_all(&buffer) } } impl CqlReadable for TinyText { fn read_from_db(db_location: &str, value_location: u64) -> io::Result<Self::ValueType> { let mut file = File::open(&db_location)?; // unwrap should be considered safe by this point, with earlier checks in the cql_db crate (if not deliberately unchecked) file.seek(SeekFrom::Start(value_location * Self::VALUE_SIZE as u64)).unwrap(); let mut size_buffer = [0; LENGTH_SIZE]; file.read_exact(&mut size_buffer)?; let mut size_rdr = Cursor::new(size_buffer); let size = usize::from(size_rdr.read_u16::<LittleEndian>()?); if size == 0 { return Ok(TinyText::new()) } let mut value_buffer = Vec::with_capacity(size); file.take(size as u64).read_to_end(&mut value_buffer)?; Ok( TinyText( // unwrap should be safe here, as we assume we are the only ones writing to the file, however low performance cost plus the fact that someone else `could` // write to the file discourages the use of the unsafe method that skips the checks String::from_utf8(value_buffer).unwrap() ) ) } } impl CqlStreamReadable for TinyText { fn read_to_stream(db_location: &str, stream: &mut dyn Write, value_location: u64, n_values: u64) -> io::Result<()> { let mut file = File::open(&db_location)?; // unwrap should be considered safe by this point, with earlier checks in the cql_db crate (if not deliberately unchecked) file.seek(SeekFrom::Start(value_location * Self::VALUE_SIZE as u64)).unwrap(); let mut value_buffer = [0; CONTENT_SIZE]; for _i in 0..n_values { // must have value cleared for each value read or previous value will be quietly retained and re-written to the (out) stream let mut size_buffer = [0; LENGTH_SIZE]; match file.read_exact(&mut size_buffer) { Err(e) => { // ignore io::ErrorKind::UnexpectedEof and continue if e.kind() != io::ErrorKind::UnexpectedEof { return Err(e) } } _ => { } } let mut size_rdr = Cursor::new(size_buffer); let size = usize::from(size_rdr.read_u16::<LittleEndian>()?); match file.read_exact(&mut value_buffer) { Err(e) => { // ignore io::ErrorKind::UnexpectedEof and continue if e.kind() != io::ErrorKind::UnexpectedEof { return Err(e) } } _ => { } } let mut write_buffer = Vec::with_capacity(LENGTH_SIZE + size); write_buffer.extend(&size_buffer); for i in 0..size { write_buffer.push(value_buffer[i]); } stream.write_all(&mut write_buffer)?; } stream.flush() } } /// Unpacks `n_values` of `TinyText` from a stream, calling `value_handler` with each value and it's index. /// /// # Errors /// /// Will return any [I/O errors](https://doc.rust-lang.org/nightly/std/io/enum.ErrorKind.html) encountered during the execution of the function. If an error /// is returned, it may be that values have already been fed into the `value_handler`. /// /// # Panics /// /// Function does not actively defend against panics, and may do so if given invalid parameters. If the function panics it may be that values have /// already been fed into the `value_handler`. /// /// # Examples /// ```ignore /// cql_db::read_to_stream::<TinyText>( /// DATABASE_LOCATION, /// &mut stream, /// &base_point, /// N_VALUES_TO_READ as u64 /// )?; /// /// stream.seek(SeekFrom::Start(0)); /// /// unpack_stream(&mut stream, N_VALUES_TO_READ, |idx, value| { /// result[idx] = value /// })?; /// ``` pub fn unpack_stream<F>(stream: &mut Cursor<Vec<u8>>, n_values: usize, mut value_handler: F) -> io::Result<()> where F: FnMut(usize, TinyText) { let mut size_buffer = [0; LENGTH_SIZE]; for index in 0..n_values { stream.read_exact(&mut size_buffer)?; let mut size_rdr = Cursor::new(size_buffer); let size = usize::from(size_rdr.read_u16::<LittleEndian>()?); if size == 0 { value_handler(index, TinyText::new()); } else { let mut value_buffer = vec![0; size]; stream.read_exact(&mut value_buffer)?; // unwrap should be safe here, as we assume we are the only ones writing to the file, however low performance cost plus the fact that someone else `could` // write to the file discourages the use of the unsafe method that skips the checks value_handler(index, TinyText(String::from_utf8(value_buffer).unwrap())); } } Ok(()) }