alexandria_tags/
lib.rs

1//! Alexandria tagging system
2//!
3//! The internals of alexandria work with records and search tags.
4//! Records are fully encrypted, meaning that searching them in a
5//! reasonable amount of work is impossible.  To solve this problem
6//! Alexandria uses search tags, that are searched in a separately
7//! encrypted tag tables.  This way records can be searched via tags,
8//! without exposing their data bodies.
9
10use serde::{
11    de::{Error, MapAccess, SeqAccess, Visitor},
12    ser::SerializeStruct,
13    Deserialize, Deserializer, Serialize, Serializer,
14};
15use std::{collections::BTreeSet, fmt};
16
17/// A set of tags where every tag is unique
18///
19/// Simply construct a set via one of the `From` implementations of a
20/// containing type.
21///
22/// ```norun
23/// # use alexandria::data::TagSet;
24/// # use std::collections::BTreeSet;
25/// let _: TagSet = vec![].into();
26/// let _: TagSet = BTreeSet::default().into();
27/// ```
28#[derive(Clone, Debug, Default, Eq, PartialEq, Hash, Serialize, Deserialize)]
29pub struct TagSet(BTreeSet<Tag>);
30
31impl TagSet {
32    pub fn empty() -> Self {
33        Self(Default::default())
34    }
35
36    pub fn insert(&mut self, t: Tag) {
37        self.0.insert(t);
38    }
39
40    /// Merge this and another tagset together into one
41    pub fn merge<T>(self, tags: T) -> Self
42    where
43        T: Into<Self>,
44    {
45        let ts = tags.into();
46        Self(ts.0.into_iter().fold(self.0, |mut set, t| {
47            set.insert(t);
48            set
49        }))
50    }
51
52    pub fn remove(&mut self, t: &Tag) {
53        self.0.remove(t);
54    }
55
56    pub fn contains(&self, t: &Tag) -> bool {
57        self.0.contains(t)
58    }
59
60    #[cfg(test)]
61    #[allow(unused)]
62    pub(crate) fn len(&self) -> usize {
63        self.0.len()
64    }
65
66    /// Any overlap between `self` and `o`
67    pub fn intersect(&self, o: &TagSet) -> bool {
68        o.iter().fold(false, |acc, t| acc || self.0.contains(t))
69    }
70
71    /// A subset where `o` needs to be contained entirely in `self`
72    pub fn subset(&self, o: &TagSet) -> bool {
73        o.iter().fold(true, |acc, t| acc && self.0.contains(t))
74    }
75
76    /// An equality set where `o` and `self` are the same
77    pub fn equality(&self, o: &TagSet) -> bool {
78        self.0 == o.0
79    }
80
81    /// No overlay between `self` and `o`
82    pub fn not(&self, o: &TagSet) -> bool {
83        o.iter().fold(true, |acc, tag| acc && !self.0.contains(tag))
84    }
85
86    /// Return an iterator over the inner collection
87    pub fn iter(&self) -> impl Iterator<Item = &Tag> {
88        self.0.iter()
89    }
90}
91
92impl From<Tag> for TagSet {
93    fn from(t: Tag) -> Self {
94        Self::from(vec![t])
95    }
96}
97
98impl<'tag> From<&'tag Tag> for TagSet {
99    fn from(t: &'tag Tag) -> Self {
100        Self::from(vec![t.clone()])
101    }
102}
103
104impl From<Vec<Tag>> for TagSet {
105    fn from(vec: Vec<Tag>) -> Self {
106        Self(vec.into_iter().fold(BTreeSet::new(), |mut set, tag| {
107            set.insert(tag);
108            set
109        }))
110    }
111}
112
113impl From<&Vec<Tag>> for TagSet {
114    fn from(vec: &Vec<Tag>) -> Self {
115        Self(vec.iter().fold(BTreeSet::new(), |mut set, tag| {
116            set.insert(tag.clone());
117            set
118        }))
119    }
120}
121
122impl From<Vec<&Tag>> for TagSet {
123    fn from(vec: Vec<&Tag>) -> Self {
124        Self(vec.into_iter().fold(BTreeSet::new(), |mut set, tag| {
125            set.insert(tag.clone());
126            set
127        }))
128    }
129}
130
131impl From<BTreeSet<Tag>> for TagSet {
132    fn from(set: BTreeSet<Tag>) -> Self {
133        Self(set)
134    }
135}
136
137impl From<TagSet> for BTreeSet<Tag> {
138    fn from(ts: TagSet) -> Self {
139        ts.0
140    }
141}
142
143/// A generic metadata tag
144///
145/// Because searching through message or file payloads might be slow,
146/// and I/O intensive (especially within the secret store), all
147/// records types have a tag metadata interface.  It's up to the
148/// implementor of an application to use these to search records,
149/// create relationships, or subsets of data.
150///
151/// Tags can also be serialised to either a human readable or binary
152/// format.  To disable this, re-compile alexandria without the
153/// `human-tags` or `tag-serialize` flags
154#[derive(Clone, Debug, Hash, PartialEq, Eq, Ord, PartialOrd)]
155pub struct Tag {
156    /// A string key for a tag
157    pub key: String,
158    /// Some binary data that is up to a service to interpret
159    pub val: Vec<u8>,
160}
161
162impl Tag {
163    /// Create a new MsgTag with key and value
164    pub fn new<K, I>(key: K, val: I) -> Self
165    where
166        K: Into<String>,
167        I: IntoIterator<Item = u8>,
168    {
169        Self {
170            key: key.into(),
171            val: val.into_iter().collect(),
172        }
173    }
174
175    /// Create a tag that consists of only a key, with no value
176    pub fn empty<K>(key: K) -> Self
177    where
178        K: Into<String>,
179    {
180        Self::new(key, vec![])
181    }
182}
183
184#[derive(Clone, Debug, Hash, PartialEq, Eq, Ord, PartialOrd)]
185struct HumanVec(Vec<u8>);
186
187impl Serialize for HumanVec {
188    fn serialize<S>(&self, ser: S) -> Result<S::Ok, S::Error>
189    where
190        S: Serializer,
191    {
192        if ser.is_human_readable() {
193            ser.serialize_str(
194                &hex::encode_upper(&self.0)
195                    .as_bytes()
196                    .chunks(4)
197                    .map(std::str::from_utf8)
198                    .collect::<Result<String, _>>()
199                    .unwrap(),
200            )
201        } else {
202            ser.serialize_bytes(&self.0)
203        }
204    }
205}
206
207impl<'de> Deserialize<'de> for HumanVec {
208    fn deserialize<D>(der: D) -> Result<Self, D::Error>
209    where
210        D: Deserializer<'de>,
211    {
212        struct HumanVecVis;
213
214        impl HumanVecVis {
215            fn from_str<E: Error>(s: &str) -> Result<HumanVec, E> {
216                Self::from_bytes(&hex::decode(s).map_err(|e| E::custom(e))?)
217            }
218
219            fn from_bytes<E: Error, V: AsRef<[u8]>>(v: V) -> Result<HumanVec, E> {
220                let v = v.as_ref();
221                Ok(HumanVec(v.iter().cloned().collect()))
222            }
223        }
224
225        impl<'de> Visitor<'de> for HumanVecVis {
226            type Value = HumanVec;
227
228            fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
229                write!(f, "A byte array or a hex string encoded byte array",)
230            }
231
232            fn visit_borrowed_str<E: Error>(self, v: &'de str) -> Result<Self::Value, E> {
233                Self::from_str(v)
234            }
235
236            fn visit_string<E: Error>(self, v: String) -> Result<Self::Value, E> {
237                Self::from_str(&v)
238            }
239
240            fn visit_borrowed_bytes<E: Error>(self, v: &'de [u8]) -> Result<Self::Value, E> {
241                Self::from_bytes(v)
242            }
243
244            fn visit_byte_buf<E: Error>(self, v: Vec<u8>) -> Result<Self::Value, E> {
245                Self::from_bytes(v)
246            }
247
248            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
249            where
250                A: SeqAccess<'de>,
251            {
252                let mut v = Vec::new();
253                while let Some(b) = seq.next_element::<u8>()? {
254                    v.push(b);
255                }
256
257                Self::from_bytes(v)
258            }
259        }
260
261        if der.is_human_readable() {
262            der.deserialize_str(HumanVecVis)
263        } else {
264            der.deserialize_bytes(HumanVecVis)
265        }
266    }
267}
268
269impl Serialize for Tag {
270    fn serialize<S>(&self, ser: S) -> Result<S::Ok, S::Error>
271    where
272        S: Serializer,
273    {
274        dbg!();
275        let mut state = ser.serialize_struct("Tag", 2)?;
276        state.serialize_field("key", &self.key)?;
277        state.serialize_field("val", &HumanVec(self.val.clone()))?;
278        state.end()
279    }
280}
281
282impl<'de> Deserialize<'de> for Tag {
283    fn deserialize<D>(der: D) -> Result<Self, D::Error>
284    where
285        D: Deserializer<'de>,
286    {
287        /// Responsible for deserialising hex-encoded payloads
288        ///
289        /// This visitor is called when the deserialiser is working
290        /// for a human readable format, such as json.
291        struct TagVisitor;
292
293        impl<'de> Visitor<'de> for TagVisitor {
294            type Value = Tag;
295
296            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
297            where
298                A: SeqAccess<'de>,
299            {
300                let key: String = seq
301                    .next_element()?
302                    .ok_or_else(|| Error::invalid_length(0, &self))?;
303
304                let hvec: HumanVec = seq
305                    .next_element()?
306                    .ok_or_else(|| Error::invalid_length(0, &self))?;
307                let val: Vec<u8> = hvec.0;
308
309                Ok(Tag { key, val })
310            }
311
312            // json will try to deserialize structs as maps
313            fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
314            where
315                A: MapAccess<'de>,
316            {
317                let mut key: Option<String> = None;
318                let mut value: Option<HumanVec> = None;
319
320                while let Some(k) = map.next_key::<String>()? {
321                    match k.as_str() {
322                        "key" => {
323                            if key.is_some() {
324                                return Err(Error::duplicate_field("key"));
325                            }
326                            key = Some(map.next_value()?);
327                        }
328                        "val" => {
329                            if value.is_some() {
330                                return Err(Error::duplicate_field("val"));
331                            }
332                            value = Some(map.next_value()?);
333                        }
334                        f => {
335                            static FIELDS: &'static [&'static str] = &["key", "val"];
336                            return Err(Error::unknown_field(f, FIELDS));
337                        }
338                    }
339                }
340
341                let key = key.ok_or_else(|| Error::missing_field("key"))?;
342                let value = value.ok_or_else(|| Error::missing_field("val"))?;
343
344                Ok(Tag { key, val: value.0 })
345            }
346
347            fn expecting(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
348                fmt.write_str("struct Tag { key, val }")
349            }
350        }
351
352        der.deserialize_struct("Tag", &["key", "val"], TagVisitor)
353    }
354}
355
356#[test]
357fn serialize_tag_json() {
358    let t = Tag {
359        key: "blorp".into(),
360        val: vec![172, 171],
361    };
362
363    use serde_json;
364    let json = serde_json::to_string(&t).unwrap();
365    assert_eq!(json.as_str(), r#"{"key":"blorp","val":"ACAB"}"#);
366}
367
368#[test]
369fn serialize_tag_bincode() {
370    let t = Tag {
371        key: "blorp".into(),
372        val: vec![172, 171],
373    };
374
375    use bincode;
376    let bc = bincode::serialize(&t).unwrap();
377    assert_eq!(
378        bc.as_slice(),
379        &[5, 0, 0, 0, 0, 0, 0, 0, 98, 108, 111, 114, 112, 2, 0, 0, 0, 0, 0, 0, 0, 172, 171]
380    );
381}
382
383#[test]
384fn deserialize_tag_json() {
385    use serde_json;
386    let json = serde_json::json!( {
387        "key": "blorp",
388        "val": "ACAB",
389    });
390    let t: Tag = serde_json::from_value(json).unwrap();
391
392    assert_eq!(
393        t,
394        Tag {
395            key: "blorp".into(),
396            val: vec![172, 171],
397        }
398    );
399}
400
401#[test]
402fn deserialize_tag_bincode() {
403    let bin = [
404        5, 0, 0, 0, 0, 0, 0, 0, 98, 108, 111, 114, 112, 2, 0, 0, 0, 0, 0, 0, 0, 172, 171,
405    ];
406
407    use bincode;
408    let t: Tag = bincode::deserialize(&bin).unwrap();
409
410    assert_eq!(
411        t,
412        Tag {
413            key: "blorp".into(),
414            val: vec![172, 171],
415        }
416    );
417}
418
419#[test]
420fn subset_1() {
421    let whole = TagSet::from(vec![Tag::empty("a"), Tag::empty("b")]);
422    let sub = TagSet::from(vec![Tag::empty("a")]);
423    assert!(whole.subset(&sub));
424}
425
426#[test]
427fn subset_2() {
428    let a = TagSet::from(vec![Tag::empty("a"), Tag::empty("b")]);
429    let b = TagSet::from(vec![Tag::empty("b"), Tag::empty("c")]);
430    assert!(!a.subset(&b));
431}
432
433#[test]
434fn intersect_1() {
435    let a = TagSet::from(vec![Tag::empty("a"), Tag::empty("b")]);
436    let b = TagSet::from(vec![Tag::empty("b"), Tag::empty("c")]);
437    assert!(a.intersect(&b));
438}
439
440#[test]
441fn intersect_4() {
442    let a = TagSet::from(vec![Tag::empty("a"), Tag::empty("b"), Tag::empty("c")]);
443    let b = TagSet::from(vec![Tag::empty("d"), Tag::empty("e")]);
444    assert!(!a.intersect(&b));
445}
446
447#[test]
448fn not_1() {
449    let a = TagSet::from(vec![Tag::empty("a"), Tag::empty("b"), Tag::empty("c")]);
450    let b = TagSet::from(vec![Tag::empty("d"), Tag::empty("e")]);
451    assert!(a.not(&b));
452}