dm_database_parser_sqllog/
tools.rs1use once_cell::sync::Lazy;
6
7const TIMESTAMP_LENGTH: usize = 23;
9const MIN_LINE_LENGTH: usize = 25;
10const TIMESTAMP_SEPARATOR_POSITIONS: [(usize, u8); 6] = [
11 (4, b'-'),
12 (7, b'-'),
13 (10, b' '),
14 (13, b':'),
15 (16, b':'),
16 (19, b'.'),
17];
18const TIMESTAMP_DIGIT_POSITIONS: [usize; 17] =
19 [0, 1, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 22];
20
21const META_START_INDEX: usize = 25;
23const REQUIRED_META_FIELDS: usize = 7;
24const META_WITH_IP_FIELDS: usize = 8;
25
26static META_FIELD_PREFIXES: Lazy<[&'static str; 8]> = Lazy::new(|| {
28 [
29 "EP[",
30 "sess:",
31 "thrd:",
32 "user:",
33 "trxid:",
34 "stmt:",
35 "appname:",
36 "ip:::ffff:",
37 ]
38});
39
40const SPACE_BYTE: u8 = b' ';
42const OPEN_PAREN_BYTE: u8 = b'(';
43const CLOSE_PAREN_CHAR: char = ')';
44
45#[inline(always)]
69pub fn is_ts_millis_bytes(bytes: &[u8]) -> bool {
70 if bytes.len() != TIMESTAMP_LENGTH {
71 return false;
72 }
73
74 for &(pos, expected) in &TIMESTAMP_SEPARATOR_POSITIONS {
76 if bytes[pos] != expected {
77 return false;
78 }
79 }
80
81 for &i in &TIMESTAMP_DIGIT_POSITIONS {
83 if !bytes[i].is_ascii_digit() {
84 return false;
85 }
86 }
87
88 true
89}
90
91pub fn is_record_start_line(line: &str) -> bool {
125 let bytes = line.as_bytes();
126 if bytes.len() < MIN_LINE_LENGTH {
127 return false;
128 }
129
130 if !is_ts_millis_bytes(&bytes[0..TIMESTAMP_LENGTH]) {
132 return false;
133 }
134
135 if bytes[23] != SPACE_BYTE || bytes[24] != OPEN_PAREN_BYTE {
137 return false;
138 }
139
140 let closing_paren_index = match line.find(CLOSE_PAREN_CHAR) {
142 Some(index) => index,
143 None => return false,
144 };
145
146 let meta_part = &line[META_START_INDEX..closing_paren_index];
148
149 let mut split_iter = meta_part.split(' ');
151 let mut field_count = 0;
152
153 for prefix in META_FIELD_PREFIXES.iter().take(REQUIRED_META_FIELDS) {
155 match split_iter.next() {
156 Some(field) if field.contains(prefix) => {
157 field_count += 1;
158 }
159 _ => return false,
160 }
161 }
162
163 if let Some(ip_field) = split_iter.next() {
165 if !ip_field.contains(META_FIELD_PREFIXES[REQUIRED_META_FIELDS]) {
166 return false;
167 }
168 field_count += 1;
169
170 if split_iter.next().is_some() {
172 return false;
173 }
174 }
175
176 field_count == REQUIRED_META_FIELDS || field_count == META_WITH_IP_FIELDS
178}
179
180#[cfg(test)]
181mod tests {
182 use super::*;
183
184 mod timestamp_tests {
185 use super::*;
186
187 #[test]
188 fn valid_timestamps() {
189 let valid_cases: &[&[u8]] = &[
190 b"2024-06-15 12:34:56.789",
191 b"2000-01-01 00:00:00.000",
192 b"2099-12-31 23:59:59.999",
193 b"2024-02-29 12:34:56.789", ];
195 for ts in valid_cases {
196 assert!(is_ts_millis_bytes(ts), "Failed for: {:?}", ts);
197 }
198 }
199
200 #[test]
201 fn wrong_length() {
202 let invalid_cases: &[&[u8]] = &[
203 b"2024-06-15 12:34:56",
204 b"2024-06-15 12:34:56.7",
205 b"2024-06-15 12:34:56.7890",
206 b"",
207 b"2024",
208 ];
209 for ts in invalid_cases {
210 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
211 }
212 }
213
214 #[test]
215 fn wrong_separator() {
216 let invalid_cases: &[&[u8]] = &[
217 b"2024-06-15 12:34:56,789", b"2024/06/15 12:34:56.789", b"2024-06-15T12:34:56.789", b"2024-06-15-12:34:56.789", b"2024-06-15 12-34-56.789", ];
223 for ts in invalid_cases {
224 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
225 }
226 }
227
228 #[test]
229 fn non_digits() {
230 let invalid_cases: &[&[u8]] = &[
231 b"202a-06-15 12:34:56.789",
232 b"2024-0b-15 12:34:56.789",
233 b"2024-06-1c 12:34:56.789",
234 b"2024-06-15 1d:34:56.789",
235 b"2024-06-15 12:3e:56.789",
236 b"2024-06-15 12:34:5f.789",
237 b"2024-06-15 12:34:56.78g",
238 ];
239 for ts in invalid_cases {
240 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
241 }
242 }
243
244 #[test]
245 fn special_chars() {
246 assert!(!is_ts_millis_bytes(b"2024-06-15 12:34:56.\x00\x00\x00"));
247 assert!(!is_ts_millis_bytes(b"\x002024-06-15 12:34:56.789"));
248 }
249 }
250
251 mod record_start_line_tests {
252 use super::*;
253
254 #[test]
255 fn valid_complete_line() {
256 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname: ip:::ffff:10.3.100.68) [SEL] select 1 from dual EXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 289655178.";
257 assert!(is_record_start_line(line));
258 }
259
260 #[test]
261 fn valid_without_ip() {
262 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname:) [SEL] select 1 from dual";
263 assert!(is_record_start_line(line));
264 }
265
266 #[test]
267 fn minimal_valid() {
268 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
269 assert!(is_record_start_line(line));
270 }
271
272 #[test]
273 fn too_short() {
274 let short_lines = [
275 "2025-08-12 10:57:09.548",
276 "2025-08-12 10:57:09.548 (",
277 "",
278 "short",
279 ];
280 for line in &short_lines {
281 assert!(!is_record_start_line(line), "Should fail for: {}", line);
282 }
283 }
284
285 #[test]
286 fn invalid_timestamp() {
287 let line = "2025-08-12 10:57:09,548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
288 assert!(!is_record_start_line(line));
289 }
290
291 #[test]
292 fn format_errors() {
293 let invalid_lines = [
294 "2025-08-12 10:57:09.548(EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body", "2025-08-12 10:57:09.548 EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body", "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app body", ];
298 for line in &invalid_lines {
299 assert!(!is_record_start_line(line), "Should fail for: {}", line);
300 }
301 }
302
303 #[test]
304 fn insufficient_fields() {
305 let line =
306 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789) body";
307 assert!(!is_record_start_line(line));
308 }
309
310 #[test]
311 fn wrong_field_order() {
312 let line = "2025-08-12 10:57:09.548 (sess:123 EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
313 assert!(!is_record_start_line(line));
314 }
315
316 #[test]
317 fn missing_required_fields() {
318 let test_cases = [
319 (
320 "2025-08-12 10:57:09.548 (sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
321 "EP",
322 ),
323 (
324 "2025-08-12 10:57:09.548 (EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
325 "sess",
326 ),
327 (
328 "2025-08-12 10:57:09.548 (EP[0] sess:123 user:alice trxid:789 stmt:999 appname:app) body",
329 "thrd",
330 ),
331 (
332 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 trxid:789 stmt:999 appname:app) body",
333 "user",
334 ),
335 (
336 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice stmt:999 appname:app) body",
337 "trxid",
338 ),
339 (
340 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 appname:app) body",
341 "stmt",
342 ),
343 (
344 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999) body",
345 "appname",
346 ),
347 ];
348 for (line, field) in &test_cases {
349 assert!(
350 !is_record_start_line(line),
351 "Should fail when missing {} field",
352 field
353 );
354 }
355 }
356
357 #[test]
358 fn with_valid_ip() {
359 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:::ffff:192.168.1.100) body";
360 assert!(is_record_start_line(line));
361 }
362
363 #[test]
364 fn with_invalid_ip_format() {
365 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:192.168.1.100) body";
366 assert!(!is_record_start_line(line));
367 }
368
369 #[test]
370 fn complex_field_values() {
371 let line = "2025-08-12 10:57:09.548 (EP[123] sess:0xABCD1234 thrd:9999999 user:USER_WITH_UNDERSCORES trxid:12345678 stmt:0xFFFFFFFF appname:app-name-with-dashes ip:::ffff:10.20.30.40) SELECT * FROM table";
372 assert!(is_record_start_line(line));
373 }
374
375 #[test]
376 fn empty_appname() {
377 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:) body";
378 assert!(is_record_start_line(line));
379 }
380
381 #[test]
382 fn continuation_line() {
383 let continuation = " SELECT * FROM users WHERE id = 1";
384 assert!(!is_record_start_line(continuation));
385 }
386
387 #[test]
388 fn double_space_in_meta() {
389 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
390 assert!(!is_record_start_line(line));
391 }
392 }
393}