Skip to main content

journal_engine/
facets.rs

1//! Field facets configuration for indexing
2//!
3//! Facets determine which fields should be indexed when processing journal files.
4
5use journal_index::FieldName;
6use std::hash::Hash;
7use std::sync::Arc;
8
9/// Configuration specifying which fields should be indexed.
10///
11/// Facets are used as part of the cache key for file indexes, since different
12/// field selections produce different indexes.
13///
14/// # Serialization
15///
16/// Facets serializes as a sequence of field name strings. The precomputed hash
17/// is NOT serialized - it is recomputed during deserialization to maintain the
18/// invariant that `precomputed_hash == hash(fields)`.
19#[derive(Debug, Clone)]
20pub struct Facets {
21    fields: Arc<Vec<FieldName>>,
22    precomputed_hash: u64,
23}
24
25impl Hash for Facets {
26    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
27        state.write_u64(self.precomputed_hash);
28    }
29}
30
31impl PartialEq for Facets {
32    fn eq(&self, other: &Self) -> bool {
33        if self.precomputed_hash != other.precomputed_hash {
34            return false;
35        }
36
37        Arc::ptr_eq(&self.fields, &other.fields) || self.fields == other.fields
38    }
39}
40
41impl Eq for Facets {}
42
43impl Facets {
44    fn default_facets() -> Vec<FieldName> {
45        let v: Vec<&str> = vec![
46            "_HOSTNAME",
47            "PRIORITY",
48            "SYSLOG_FACILITY",
49            "ERRNO",
50            "SYSLOG_IDENTIFIER",
51            // "UNIT",
52            "USER_UNIT",
53            "MESSAGE_ID",
54            "_BOOT_ID",
55            "_SYSTEMD_OWNER_UID",
56            "_UID",
57            "OBJECT_SYSTEMD_OWNER_UID",
58            "OBJECT_UID",
59            "_GID",
60            "OBJECT_GID",
61            "_CAP_EFFECTIVE",
62            "_AUDIT_LOGINUID",
63            "OBJECT_AUDIT_LOGINUID",
64            "CODE_FUNC",
65            "CODE_FILE",
66            "_SELINUX_CONTEXT",
67            "_MACHINE_ID",
68            "_SYSTEMD_SLICE",
69            "_EXE",
70            // "_SYSTEMD_UNIT",
71            "_NAMESPACE",
72            "_TRANSPORT",
73            "_RUNTIME_SCOPE",
74            "_STREAM_ID",
75            // "_SYSTEMD_CGROUP",
76            "_COMM",
77            "_SYSTEMD_USER_UNIT",
78            "_SYSTEMD_USER_SLICE",
79            // "_SYSTEMD_SESSION",
80        ];
81
82        v.into_iter().map(FieldName::new_unchecked).collect()
83    }
84
85    pub fn new(facets: &[String]) -> Self {
86        let mut facets = if facets.is_empty() {
87            Self::default_facets()
88        } else {
89            // Parse and validate each facet string into FieldName
90            facets
91                .iter()
92                .filter_map(|s| FieldName::new(s.clone()))
93                .collect()
94        };
95
96        // Sort and deduplicate to get a canonical set of fields
97        facets.sort();
98        facets.dedup();
99
100        use std::hash::Hasher;
101        let mut hasher = std::hash::DefaultHasher::new();
102        // Hash the string representation for consistency
103        for field in &facets {
104            field.as_str().hash(&mut hasher);
105        }
106        let precomputed_hash = hasher.finish();
107
108        Self {
109            fields: Arc::new(facets),
110            precomputed_hash,
111        }
112    }
113
114    /// Returns an iterator over the facet field names
115    pub fn iter(&self) -> impl Iterator<Item = &FieldName> {
116        self.fields.iter()
117    }
118
119    /// Returns the facet fields as a slice
120    pub fn as_slice(&self) -> &[FieldName] {
121        &self.fields
122    }
123
124    /// Returns the number of facet fields
125    pub fn len(&self) -> usize {
126        self.fields.len()
127    }
128
129    /// Returns true if there are no facet fields
130    #[allow(dead_code)]
131    pub fn is_empty(&self) -> bool {
132        self.fields.is_empty()
133    }
134
135    pub fn precomputed_hash(&self) -> u64 {
136        self.precomputed_hash
137    }
138}
139
140impl serde::Serialize for Facets {
141    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
142    where
143        S: serde::Serializer,
144    {
145        // Serialize as a sequence of field name strings.
146        // The precomputed_hash is NOT serialized - it will be recomputed on deserialization.
147        use serde::ser::SerializeSeq;
148        let mut seq = serializer.serialize_seq(Some(self.fields.len()))?;
149        for field in self.fields.iter() {
150            seq.serialize_element(field.as_str())?;
151        }
152        seq.end()
153    }
154}
155
156impl<'de> serde::Deserialize<'de> for Facets {
157    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
158    where
159        D: serde::Deserializer<'de>,
160    {
161        // Deserialize as a sequence of strings, then reconstruct via Facets::new()
162        // which will recompute the hash, maintaining the invariant.
163        let fields: Vec<String> = Vec::deserialize(deserializer)?;
164        Ok(Facets::new(&fields))
165    }
166}