dm_database_parser_sqllog/
tools.rs1use once_cell::sync::Lazy;
6
7const TIMESTAMP_LENGTH: usize = 23;
9const MIN_LINE_LENGTH: usize = 25;
10const TIMESTAMP_SEPARATOR_POSITIONS: [(usize, u8); 6] = [
11 (4, b'-'),
12 (7, b'-'),
13 (10, b' '),
14 (13, b':'),
15 (16, b':'),
16 (19, b'.'),
17];
18const TIMESTAMP_DIGIT_POSITIONS: [usize; 17] =
19 [0, 1, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 22];
20
21const META_START_INDEX: usize = 25;
23#[allow(dead_code)]
24const MIN_META_FIELDS: usize = 6; #[allow(dead_code)]
26const REQUIRED_META_FIELDS: usize = 7;
27#[allow(dead_code)]
28const META_WITH_IP_FIELDS: usize = 8;
29
30static META_FIELD_PREFIXES: Lazy<[&'static str; 8]> = Lazy::new(|| {
32 [
33 "EP[",
34 "sess:",
35 "thrd:",
36 "user:",
37 "trxid:",
38 "stmt:",
39 "appname:",
40 "ip:::ffff:",
41 ]
42});
43
44const SPACE_BYTE: u8 = b' ';
46const OPEN_PAREN_BYTE: u8 = b'(';
47const CLOSE_PAREN_CHAR: char = ')';
48
49#[inline(always)]
73pub fn is_ts_millis_bytes(bytes: &[u8]) -> bool {
74 if bytes.len() != TIMESTAMP_LENGTH {
75 return false;
76 }
77
78 for &(pos, expected) in &TIMESTAMP_SEPARATOR_POSITIONS {
80 if bytes[pos] != expected {
81 return false;
82 }
83 }
84
85 for &i in &TIMESTAMP_DIGIT_POSITIONS {
87 if !bytes[i].is_ascii_digit() {
88 return false;
89 }
90 }
91
92 true
93}
94
95pub fn is_record_start_line(line: &str) -> bool {
129 let bytes = line.as_bytes();
131 if bytes.len() < MIN_LINE_LENGTH {
132 return false;
133 }
134
135 if !is_ts_millis_bytes(&bytes[0..TIMESTAMP_LENGTH]) {
137 return false;
138 }
139
140 if bytes[23] != SPACE_BYTE || bytes[24] != OPEN_PAREN_BYTE {
142 return false;
143 }
144
145 let closing_paren_index = match line.find(CLOSE_PAREN_CHAR) {
147 Some(idx) => idx,
148 None => return false,
149 };
150
151 let meta_part = &line[META_START_INDEX..closing_paren_index];
153
154 validate_meta_fields(meta_part)
156}
157
158#[inline]
166fn validate_meta_fields(meta: &str) -> bool {
167 let mut remaining = meta;
168
169 for &prefix in META_FIELD_PREFIXES.iter().take(5) {
171 if !remaining.starts_with(prefix) {
173 return false;
174 }
175
176 remaining = &remaining[prefix.len()..];
178
179 match remaining.find(' ') {
181 Some(space_idx) => {
182 remaining = &remaining[space_idx + 1..];
184 }
185 None => {
186 return prefix == "trxid:";
189 }
190 }
191 }
192
193 if remaining.is_empty() {
198 return true; }
200
201 if !remaining.starts_with("stmt:") {
202 return false; }
204
205 remaining = &remaining[5..]; match remaining.find(' ') {
209 Some(space_idx) => {
210 remaining = &remaining[space_idx + 1..];
211 }
212 None => {
213 return true; }
215 }
216
217 if remaining.is_empty() {
219 return true; }
221
222 if !remaining.starts_with("appname:") {
223 return false; }
225
226 remaining = &remaining[8..]; if let Some(_ip_idx) = remaining.find(" ip:::ffff:") {
231 return true;
233 }
234
235 true
237}
238
239#[cfg(test)]
240mod tests {
241 use super::*;
242
243 mod timestamp_tests {
244 use super::*;
245
246 #[test]
247 fn valid_timestamps() {
248 let valid_cases: &[&[u8]] = &[
249 b"2024-06-15 12:34:56.789",
250 b"2000-01-01 00:00:00.000",
251 b"2099-12-31 23:59:59.999",
252 b"2024-02-29 12:34:56.789", ];
254 for ts in valid_cases {
255 assert!(is_ts_millis_bytes(ts), "Failed for: {:?}", ts);
256 }
257 }
258
259 #[test]
260 fn wrong_length() {
261 let invalid_cases: &[&[u8]] = &[
262 b"2024-06-15 12:34:56",
263 b"2024-06-15 12:34:56.7",
264 b"2024-06-15 12:34:56.7890",
265 b"",
266 b"2024",
267 ];
268 for ts in invalid_cases {
269 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
270 }
271 }
272
273 #[test]
274 fn wrong_separator() {
275 let invalid_cases: &[&[u8]] = &[
276 b"2024-06-15 12:34:56,789", b"2024/06/15 12:34:56.789", b"2024-06-15T12:34:56.789", b"2024-06-15-12:34:56.789", b"2024-06-15 12-34-56.789", ];
282 for ts in invalid_cases {
283 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
284 }
285 }
286
287 #[test]
288 fn non_digits() {
289 let invalid_cases: &[&[u8]] = &[
290 b"202a-06-15 12:34:56.789",
291 b"2024-0b-15 12:34:56.789",
292 b"2024-06-1c 12:34:56.789",
293 b"2024-06-15 1d:34:56.789",
294 b"2024-06-15 12:3e:56.789",
295 b"2024-06-15 12:34:5f.789",
296 b"2024-06-15 12:34:56.78g",
297 ];
298 for ts in invalid_cases {
299 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
300 }
301 }
302
303 #[test]
304 fn special_chars() {
305 assert!(!is_ts_millis_bytes(b"2024-06-15 12:34:56.\x00\x00\x00"));
306 assert!(!is_ts_millis_bytes(b"\x002024-06-15 12:34:56.789"));
307 }
308 }
309
310 mod record_start_line_tests {
311 use super::*;
312
313 #[test]
314 fn valid_complete_line() {
315 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname: ip:::ffff:10.3.100.68) [SEL] select 1 from dual EXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 289655178.";
316 assert!(is_record_start_line(line));
317 }
318
319 #[test]
320 fn valid_without_ip() {
321 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname:) [SEL] select 1 from dual";
322 assert!(is_record_start_line(line));
323 }
324
325 #[test]
326 fn minimal_valid() {
327 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
328 assert!(is_record_start_line(line));
329 }
330
331 #[test]
332 fn too_short() {
333 let short_lines = [
334 "2025-08-12 10:57:09.548",
335 "2025-08-12 10:57:09.548 (",
336 "",
337 "short",
338 ];
339 for line in &short_lines {
340 assert!(!is_record_start_line(line), "Should fail for: {}", line);
341 }
342 }
343
344 #[test]
345 fn invalid_timestamp() {
346 let line = "2025-08-12 10:57:09,548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
347 assert!(!is_record_start_line(line));
348 }
349
350 #[test]
351 fn format_errors() {
352 let invalid_lines = [
353 "2025-08-12 10:57:09.548(EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body", "2025-08-12 10:57:09.548 EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body", "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app body", ];
357 for line in &invalid_lines {
358 assert!(!is_record_start_line(line), "Should fail for: {}", line);
359 }
360 }
361
362 #[test]
363 fn insufficient_fields() {
364 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice) body";
366 assert!(!is_record_start_line(line));
367 }
368
369 #[test]
370 fn wrong_field_order() {
371 let line = "2025-08-12 10:57:09.548 (sess:123 EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
372 assert!(!is_record_start_line(line));
373 }
374
375 #[test]
376 fn missing_required_fields() {
377 let test_cases = [
379 (
380 "2025-08-12 10:57:09.548 (sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
381 "EP",
382 ),
383 (
384 "2025-08-12 10:57:09.548 (EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
385 "sess",
386 ),
387 (
388 "2025-08-12 10:57:09.548 (EP[0] sess:123 user:alice trxid:789 stmt:999 appname:app) body",
389 "thrd",
390 ),
391 (
392 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 trxid:789 stmt:999 appname:app) body",
393 "user",
394 ),
395 (
396 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice stmt:999 appname:app) body",
397 "trxid",
398 ),
399 ];
400 for (line, field) in &test_cases {
401 assert!(
402 !is_record_start_line(line),
403 "Should fail when missing {} field",
404 field
405 );
406 }
407 }
408
409 #[test]
410 fn with_valid_ip() {
411 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:::ffff:192.168.1.100) body";
412 assert!(is_record_start_line(line));
413 }
414
415 #[test]
416 fn with_invalid_ip_format() {
417 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:192.168.1.100) body";
419 assert!(is_record_start_line(line));
422 }
423
424 #[test]
425 fn complex_field_values() {
426 let line = "2025-08-12 10:57:09.548 (EP[123] sess:0xABCD1234 thrd:9999999 user:USER_WITH_UNDERSCORES trxid:12345678 stmt:0xFFFFFFFF appname:app-name-with-dashes ip:::ffff:10.20.30.40) SELECT * FROM table";
427 assert!(is_record_start_line(line));
428 }
429
430 #[test]
431 fn empty_appname() {
432 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:) body";
433 assert!(is_record_start_line(line));
434 }
435
436 #[test]
437 fn continuation_line() {
438 let continuation = " SELECT * FROM users WHERE id = 1";
439 assert!(!is_record_start_line(continuation));
440 }
441
442 #[test]
443 fn double_space_in_meta() {
444 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
447 assert!(!is_record_start_line(line));
449
450 let valid_line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
452 assert!(is_record_start_line(valid_line));
453 }
454 }
455}