timeseries_table_core/coverage/
layout.rs1use std::path::PathBuf;
12
13use snafu::Snafu;
14
15use crate::metadata::table_metadata::TimeBucket;
16
17pub const COVERAGE_ROOT_DIR: &str = "_coverage";
19pub const SEGMENT_COVERAGE_DIR: &str = "_coverage/segments";
21pub const TABLE_SNAPSHOT_DIR: &str = "_coverage/table";
23pub const COVERAGE_EXT: &str = "roar";
25
26#[derive(Debug, Snafu)]
28pub enum CoverageLayoutError {
29 #[snafu(display("Invalid coverage id: {coverage_id}"))]
31 InvalidCoverageId {
32 coverage_id: String,
34 },
35}
36
37pub fn validate_coverage_id(coverage_id: &str) -> Result<(), CoverageLayoutError> {
44 if coverage_id.is_empty() || coverage_id.len() > 128 {
45 return Err(CoverageLayoutError::InvalidCoverageId {
46 coverage_id: coverage_id.to_string(),
47 });
48 }
49
50 if !coverage_id.chars().any(|c| c.is_ascii_alphanumeric()) {
52 return Err(CoverageLayoutError::InvalidCoverageId {
53 coverage_id: coverage_id.to_string(),
54 });
55 }
56
57 if coverage_id.starts_with('.') {
59 return Err(CoverageLayoutError::InvalidCoverageId {
60 coverage_id: coverage_id.to_string(),
61 });
62 }
63
64 if coverage_id.contains('/') || coverage_id.contains('\\') || coverage_id.contains("..") {
66 return Err(CoverageLayoutError::InvalidCoverageId {
67 coverage_id: coverage_id.to_string(),
68 });
69 }
70
71 let ok = coverage_id
73 .chars()
74 .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-'));
75
76 if !ok {
77 return Err(CoverageLayoutError::InvalidCoverageId {
78 coverage_id: coverage_id.to_string(),
79 });
80 }
81
82 Ok(())
83}
84
85pub fn segment_coverage_path(coverage_id: &str) -> Result<PathBuf, CoverageLayoutError> {
87 validate_coverage_id(coverage_id)?;
88 let mut p = PathBuf::from(COVERAGE_ROOT_DIR);
89 p.push("segments");
90 p.push(format!("{coverage_id}.{COVERAGE_EXT}"));
91 Ok(p)
92}
93
94pub fn table_snapshot_path(
96 version: u64,
97 snapshot_id: &str,
98) -> Result<PathBuf, CoverageLayoutError> {
99 validate_coverage_id(snapshot_id)?;
100 let mut p = PathBuf::from(COVERAGE_ROOT_DIR);
101 p.push("table");
102 p.push(format!("{version}-{snapshot_id}.{COVERAGE_EXT}"));
103 Ok(p)
104}
105
106fn coverage_id_v1(
107 domain_prefix: &[u8],
108 output_prefix: &str,
109 bucket_spec: &TimeBucket,
110 time_column: &str,
111 coverage_bytes: &[u8],
112) -> String {
113 let mut h = blake3::Hasher::new();
114
115 h.update(domain_prefix);
117 h.update(b"\0");
118
119 match bucket_spec {
121 TimeBucket::Seconds(n) => {
122 h.update(b"S");
123 h.update(&n.to_le_bytes());
124 }
125 TimeBucket::Minutes(n) => {
126 h.update(b"M");
127 h.update(&n.to_le_bytes());
128 }
129 TimeBucket::Hours(n) => {
130 h.update(b"H");
131 h.update(&n.to_le_bytes());
132 }
133 TimeBucket::Days(n) => {
134 h.update(b"D");
135 h.update(&n.to_le_bytes());
136 }
137 }
138
139 h.update(b"\0");
140 h.update(time_column.as_bytes());
141 h.update(b"\0");
142 h.update(coverage_bytes);
143
144 let hex = h.finalize().to_hex();
145 format!("{output_prefix}-{}", &hex[..32])
146}
147
148pub fn segment_coverage_id_v1(
150 bucket_spec: &TimeBucket,
151 time_column: &str,
152 coverage_bytes: &[u8],
153) -> String {
154 coverage_id_v1(
155 b"segcov-v1",
156 "segcov",
157 bucket_spec,
158 time_column,
159 coverage_bytes,
160 )
161}
162
163pub fn table_coverage_id_v1(
165 bucket_spec: &TimeBucket,
166 time_column: &str,
167 coverage_bytes: &[u8],
168) -> String {
169 coverage_id_v1(
170 b"tblcov-v1",
171 "tblcov",
172 bucket_spec,
173 time_column,
174 coverage_bytes,
175 )
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181
182 #[test]
183 fn validate_coverage_id_accepts_valid_ids() {
184 let long = "a".repeat(128);
185 let valid_ids = ["abc", "A_B-1.2", long.as_str()];
186
187 for id in valid_ids {
188 validate_coverage_id(id).expect("valid id should pass");
189 }
190 }
191
192 #[test]
193 fn validate_coverage_id_rejects_empty_or_too_long() {
194 let too_long = "x".repeat(129);
195 assert!(validate_coverage_id("").is_err());
196 assert!(validate_coverage_id(&too_long).is_err());
197 }
198
199 #[test]
200 fn validate_coverage_id_rejects_path_components() {
201 for id in ["a/b", "a\\b", "a..b", "..", "../etc"] {
202 assert!(validate_coverage_id(id).is_err(), "id `{id}` should fail");
203 }
204 }
205
206 #[test]
207 fn validate_coverage_id_rejects_disallowed_chars() {
208 for id in ["space id", "id*", "id@", "id$", "id:"] {
209 assert!(validate_coverage_id(id).is_err(), "id `{id}` should fail");
210 }
211 }
212
213 #[test]
214 fn segment_coverage_path_formats_and_validates() {
215 let id = "seg-001";
216 let path = segment_coverage_path(id).expect("valid id");
217 assert_eq!(path, PathBuf::from("_coverage/segments/seg-001.roar"));
218
219 assert!(segment_coverage_path("bad/id").is_err());
221 }
222
223 #[test]
224 fn table_snapshot_path_formats() {
225 let path = table_snapshot_path(42, "snap-001").expect("valid snapshot id");
226 assert_eq!(path, PathBuf::from("_coverage/table/42-snap-001.roar"));
227 }
228
229 #[test]
230 fn segment_coverage_id_is_deterministic_and_valid() {
231 let bucket = TimeBucket::Minutes(1);
232 let time_col = "ts";
233 let bytes = b"bitmap-bytes";
234
235 let id1 = segment_coverage_id_v1(&bucket, time_col, bytes);
236 let id2 = segment_coverage_id_v1(&bucket, time_col, bytes);
237
238 assert_eq!(id1, id2, "same inputs must produce stable id");
239 assert!(id1.starts_with("segcov-"));
240 assert_eq!(id1.len(), "segcov-".len() + 32, "prefix + 32 hex chars");
241 validate_coverage_id(&id1).expect("derived id should be valid");
242 }
243
244 #[test]
245 fn segment_coverage_id_changes_with_inputs() {
246 let bytes = b"bytes";
247
248 let base = segment_coverage_id_v1(&TimeBucket::Seconds(5), "ts", bytes);
249 let different_bucket = segment_coverage_id_v1(&TimeBucket::Hours(5), "ts", bytes);
250 let different_column = segment_coverage_id_v1(&TimeBucket::Seconds(5), "event_time", bytes);
251 let different_bytes = segment_coverage_id_v1(&TimeBucket::Seconds(5), "ts", b"other");
252
253 assert_ne!(base, different_bucket, "bucket spec should affect id");
254 assert_ne!(base, different_column, "time column should affect id");
255 assert_ne!(base, different_bytes, "coverage bytes should affect id");
256 }
257
258 #[test]
259 fn table_coverage_id_is_deterministic_and_valid() {
260 let bucket = TimeBucket::Hours(1);
261 let time_col = "ts";
262 let bytes = b"table-bitmap";
263
264 let id1 = table_coverage_id_v1(&bucket, time_col, bytes);
265 let id2 = table_coverage_id_v1(&bucket, time_col, bytes);
266
267 assert_eq!(id1, id2, "same inputs must produce stable id");
268 assert!(id1.starts_with("tblcov-"));
269 assert_eq!(id1.len(), "tblcov-".len() + 32, "prefix + 32 hex chars");
270 validate_coverage_id(&id1).expect("derived id should be valid");
271 }
272
273 #[test]
274 fn table_coverage_id_changes_with_inputs() {
275 let bytes = b"bytes";
276
277 let base = table_coverage_id_v1(&TimeBucket::Minutes(15), "ts", bytes);
278 let different_bucket = table_coverage_id_v1(&TimeBucket::Days(1), "ts", bytes);
279 let different_column = table_coverage_id_v1(&TimeBucket::Minutes(15), "event_time", bytes);
280 let different_bytes = table_coverage_id_v1(&TimeBucket::Minutes(15), "ts", b"other");
281
282 assert_ne!(base, different_bucket, "bucket spec should affect id");
283 assert_ne!(base, different_column, "time column should affect id");
284 assert_ne!(base, different_bytes, "coverage bytes should affect id");
285 }
286}