1use std::borrow::Cow;
2
3use enum_dispatch::enum_dispatch;
4
5use crate::EncDec;
6use crate::analyse::{Analyze, StatType};
7use crate::v01::{EncodedStream, FsstStrEncoder, IntEncoder, RawStream, StreamMeta};
8
9#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct EncodedName(pub String);
12
13pub type Property<'a> = EncDec<RawProperty<'a>, ParsedProperty<'a>>;
15
16pub enum PropertyKind {
17 Bool,
18 Integer,
19 Float,
20 String,
21 SharedDict,
22}
23
24#[derive(Debug, Clone, PartialEq)]
26pub struct RawScalar<'a> {
27 pub name: &'a str,
28 pub presence: RawPresence<'a>,
29 pub data: RawStream<'a>,
30}
31
32#[derive(Debug, Clone, PartialEq)]
34pub struct EncodedScalar {
35 pub name: EncodedName,
36 pub presence: EncodedPresence,
37 pub data: EncodedStream,
38}
39
40#[derive(Debug, Clone, PartialEq)]
44pub enum RawStringsEncoding<'a> {
45 Plain(RawPlainData<'a>),
47 Dictionary {
49 plain_data: RawPlainData<'a>,
50 offsets: RawStream<'a>,
51 },
52 FsstPlain(RawFsstData<'a>),
54 FsstDictionary {
56 fsst_data: RawFsstData<'a>,
57 offsets: RawStream<'a>,
58 },
59}
60
61#[derive(Debug, Clone, PartialEq)]
63pub enum EncodedStringsEncoding {
64 Plain(EncodedPlainData),
65 Dictionary {
66 plain_data: EncodedPlainData,
67 offsets: EncodedStream,
68 },
69 FsstPlain(EncodedFsstData),
70 FsstDictionary {
71 fsst_data: EncodedFsstData,
72 offsets: EncodedStream,
73 },
74}
75
76#[derive(Debug, Clone, PartialEq)]
78pub struct RawStrings<'a> {
79 pub name: &'a str,
80 pub presence: RawPresence<'a>,
81 pub encoding: RawStringsEncoding<'a>,
82}
83
84#[derive(Debug, Clone, PartialEq)]
86pub struct EncodedStrings {
87 pub name: EncodedName,
88 pub presence: EncodedPresence,
89 pub encoding: EncodedStringsEncoding,
90}
91
92#[derive(Debug, Clone, PartialEq)]
98pub enum RawSharedDictEncoding<'a> {
99 Plain(RawPlainData<'a>),
101 FsstPlain(RawFsstData<'a>),
103}
104
105#[derive(Debug, Clone, PartialEq)]
107pub enum EncodedSharedDictEncoding {
108 Plain(EncodedPlainData),
109 FsstPlain(EncodedFsstData),
110}
111
112#[derive(Debug, Clone, PartialEq)]
114pub struct RawSharedDict<'a> {
115 pub name: &'a str,
116 pub encoding: RawSharedDictEncoding<'a>,
117 pub children: Vec<RawSharedDictItem<'a>>,
118}
119
120#[derive(Debug, Clone, PartialEq)]
122pub struct EncodedSharedDict {
123 pub name: EncodedName,
124 pub encoding: EncodedSharedDictEncoding,
125 pub children: Vec<EncodedSharedDictItem>,
126}
127
128#[derive(Debug, PartialEq, Clone)]
130pub enum RawProperty<'a> {
131 Bool(RawScalar<'a>),
132 I8(RawScalar<'a>),
133 U8(RawScalar<'a>),
134 I32(RawScalar<'a>),
135 U32(RawScalar<'a>),
136 I64(RawScalar<'a>),
137 U64(RawScalar<'a>),
138 F32(RawScalar<'a>),
139 F64(RawScalar<'a>),
140 Str(RawStrings<'a>),
141 SharedDict(RawSharedDict<'a>),
142}
143
144#[derive(Debug, Clone, PartialEq)]
146pub enum EncodedProperty {
147 Bool(EncodedScalar),
148 I8(EncodedScalar),
149 U8(EncodedScalar),
150 I32(EncodedScalar),
151 U32(EncodedScalar),
152 I64(EncodedScalar),
153 U64(EncodedScalar),
154 F32(EncodedScalar),
155 F64(EncodedScalar),
156 Str(EncodedStrings),
157 SharedDict(EncodedSharedDict),
158}
159
160#[derive(Clone, PartialEq, strum::IntoStaticStr)]
162#[strum(serialize_all = "snake_case")]
163#[enum_dispatch(Analyze)]
164pub enum ParsedProperty<'a> {
165 Bool(ParsedScalar<'a, bool>),
166 I8(ParsedScalar<'a, i8>),
167 U8(ParsedScalar<'a, u8>),
168 I32(ParsedScalar<'a, i32>),
169 U32(ParsedScalar<'a, u32>),
170 I64(ParsedScalar<'a, i64>),
171 U64(ParsedScalar<'a, u64>),
172 F32(ParsedScalar<'a, f32>),
173 F64(ParsedScalar<'a, f64>),
174 Str(ParsedStrings<'a>),
175 SharedDict(ParsedSharedDict<'a>),
176}
177
178#[derive(Debug, Clone, PartialEq, strum::IntoStaticStr)]
187#[strum(serialize_all = "snake_case")]
188pub enum StagedProperty {
189 Bool(StagedScalar<bool>),
190 I8(StagedScalar<i8>),
191 U8(StagedScalar<u8>),
192 I32(StagedScalar<i32>),
193 U32(StagedScalar<u32>),
194 I64(StagedScalar<i64>),
195 U64(StagedScalar<u64>),
196 F32(StagedScalar<f32>),
197 F64(StagedScalar<f64>),
198 Str(StagedStrings),
199 SharedDict(StagedSharedDict),
200}
201
202#[derive(Clone, PartialEq)]
203#[cfg_attr(all(not(test), feature = "arbitrary"), derive(arbitrary::Arbitrary))]
204pub struct ParsedScalar<'a, T: Copy + PartialEq> {
205 pub name: &'a str,
206 pub values: Vec<Option<T>>,
207}
208
209#[derive(Debug, Clone, PartialEq, Eq)]
211#[cfg_attr(all(not(test), feature = "arbitrary"), derive(arbitrary::Arbitrary))]
212pub struct ParsedSharedDictItem<'a> {
213 pub suffix: &'a str,
215 pub ranges: Vec<(i32, i32)>,
221}
222
223#[derive(Debug, Clone, PartialEq, Eq)]
225pub struct ParsedStrings<'a> {
226 pub name: &'a str,
227 pub lengths: Vec<i32>,
242 pub data: Cow<'a, str>,
243}
244
245pub type SharedDictItem<'a> = EncDec<RawSharedDictItem<'a>, ParsedSharedDictItem<'a>>;
247
248#[derive(Debug, Clone, PartialEq, Eq)]
250pub struct ParsedSharedDict<'a> {
251 pub prefix: &'a str,
252 pub data: Cow<'a, str>,
253 pub items: Vec<ParsedSharedDictItem<'a>>,
254}
255
256#[derive(Debug, Clone, Default, PartialEq, Eq)]
257#[cfg_attr(all(not(test), feature = "arbitrary"), derive(arbitrary::Arbitrary))]
258pub struct ParsedPresence(pub Option<Vec<bool>>);
259
260#[derive(Debug, Clone, Copy, PartialEq, Eq, strum::EnumIter)]
261#[cfg_attr(all(not(test), feature = "arbitrary"), derive(arbitrary::Arbitrary))]
262pub enum PresenceStream {
263 Present,
265 Absent,
267}
268
269#[derive(Clone, Debug, PartialEq)]
271pub struct RawSharedDictItem<'a> {
272 pub name: &'a str,
273 pub presence: RawPresence<'a>,
274 pub data: RawStream<'a>,
275}
276
277#[derive(Clone, Debug, PartialEq)]
279pub struct EncodedSharedDictItem {
280 pub name: EncodedName,
281 pub presence: EncodedPresence,
282 pub data: EncodedStream,
283}
284
285#[derive(Debug, Clone, PartialEq)]
287pub struct RawPlainData<'a> {
288 pub lengths: RawStream<'a>,
289 pub data: RawStream<'a>,
290}
291
292#[derive(Debug, Clone, PartialEq)]
294pub struct EncodedPlainData {
295 pub lengths: EncodedStream,
296 pub data: EncodedStream,
297}
298
299#[derive(Debug, Clone, PartialEq)]
301pub struct RawFsstData<'a> {
302 pub symbol_lengths: RawStream<'a>,
303 pub symbol_table: RawStream<'a>,
304 pub lengths: RawStream<'a>,
305 pub corpus: RawStream<'a>,
306}
307
308#[derive(Debug, Clone, PartialEq)]
310pub struct EncodedFsstData {
311 pub symbol_lengths: EncodedStream,
312 pub symbol_table: EncodedStream,
313 pub lengths: EncodedStream,
314 pub corpus: EncodedStream,
315}
316
317#[derive(Debug, Clone, PartialEq, Default)]
319pub struct RawPresence<'a>(pub Option<RawStream<'a>>);
320
321#[derive(Debug, Clone, PartialEq, Default)]
323pub struct EncodedPresence(pub Option<EncodedStream>);
324
325#[derive(Debug, Clone, PartialEq, Eq)]
328pub enum PropertyEncoder {
329 Scalar(ScalarEncoder),
331 SharedDict(SharedDictEncoder),
333}
334
335#[derive(Debug, Clone, Copy, PartialEq, Eq)]
337#[cfg_attr(all(not(test), feature = "arbitrary"), derive(arbitrary::Arbitrary))]
338pub struct ScalarEncoder {
339 pub presence: PresenceStream,
340 pub value: ScalarValueEncoder,
341}
342
343#[derive(Debug, Clone, Copy, PartialEq, Eq, strum::IntoStaticStr)]
345#[strum(serialize_all = "snake_case")]
346#[cfg_attr(all(not(test), feature = "arbitrary"), derive(arbitrary::Arbitrary))]
347pub enum ScalarValueEncoder {
348 Int(IntEncoder),
349 String(StrEncoder),
350 Float,
351 Bool,
352}
353
354#[derive(Debug, Clone, PartialEq, Eq)]
356pub struct SharedDictItemEncoder {
357 pub presence: PresenceStream,
359 pub offsets: IntEncoder,
361}
362
363#[derive(Debug, Clone, PartialEq, Eq)]
365pub struct SharedDictEncoder {
366 pub dict_encoder: StrEncoder,
368 pub items: Vec<SharedDictItemEncoder>,
370}
371
372#[derive(Debug, Eq, PartialEq, Clone, Copy)]
373#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
374#[cfg_attr(all(not(test), feature = "arbitrary"), derive(arbitrary::Arbitrary))]
375pub enum StrEncoder {
376 Plain { string_lengths: IntEncoder },
377 Fsst(FsstStrEncoder),
378}
379
380#[derive(Debug, Clone, PartialEq)]
384#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
385pub struct StagedScalar<T: Copy + PartialEq> {
386 pub name: String,
387 pub values: Vec<Option<T>>,
388}
389
390#[derive(Debug, Clone, PartialEq, Eq)]
392pub struct StagedStrings {
393 pub name: String,
394 pub lengths: Vec<i32>,
396 pub data: String,
397}
398
399#[derive(Debug, Clone, PartialEq, Eq)]
401pub struct StagedSharedDictItem {
402 pub suffix: String,
403 pub ranges: Vec<(i32, i32)>,
405}
406
407#[derive(Debug, Clone, PartialEq, Eq)]
409pub struct StagedSharedDict {
410 pub prefix: String,
411 pub data: String,
412 pub items: Vec<StagedSharedDictItem>,
413}