dm_database_parser_sqllog/
tools.rs1const TIMESTAMP_LENGTH: usize = 23;
11const MIN_LINE_LENGTH: usize = 25;
12
13const SPACE_BYTE: u8 = b' ';
15const OPEN_PAREN_BYTE: u8 = b'(';
16const CLOSE_PAREN_CHAR: char = ')';
17
18#[inline(always)]
42pub fn is_ts_millis_bytes(bytes: &[u8]) -> bool {
43 if bytes.len() != TIMESTAMP_LENGTH {
44 return false;
45 }
46
47 if bytes[4] != b'-'
49 || bytes[7] != b'-'
50 || bytes[10] != b' '
51 || bytes[13] != b':'
52 || bytes[16] != b':'
53 || bytes[19] != b'.'
54 {
55 return false;
56 }
57
58 let is_digit = |b: u8| b.is_ascii_digit();
61
62 is_digit(bytes[0])
63 && is_digit(bytes[1])
64 && is_digit(bytes[2])
65 && is_digit(bytes[3])
66 && is_digit(bytes[5])
67 && is_digit(bytes[6])
68 && is_digit(bytes[8])
69 && is_digit(bytes[9])
70 && is_digit(bytes[11])
71 && is_digit(bytes[12])
72 && is_digit(bytes[14])
73 && is_digit(bytes[15])
74 && is_digit(bytes[17])
75 && is_digit(bytes[18])
76 && is_digit(bytes[20])
77 && is_digit(bytes[21])
78 && is_digit(bytes[22])
79}
80
81pub fn is_record_start_line(line: &str) -> bool {
115 let bytes = line.as_bytes();
117 if bytes.len() < MIN_LINE_LENGTH {
118 return false;
119 }
120
121 if !is_ts_millis_bytes(&bytes[0..TIMESTAMP_LENGTH]) {
123 return false;
124 }
125
126 if bytes[23] != SPACE_BYTE || bytes[24] != OPEN_PAREN_BYTE {
128 return false;
129 }
130
131 let closing_paren_index = match line.find(CLOSE_PAREN_CHAR) {
133 Some(idx) => idx,
134 None => return false,
135 };
136
137 let meta_part = &line[25..closing_paren_index];
139 validate_meta_fields_fast(meta_part)
140}
141
142pub fn is_probable_record_start_line(line: &str) -> bool {
147 let bytes = line.as_bytes();
148 if bytes.len() < MIN_LINE_LENGTH {
149 return false;
150 }
151 if !is_ts_millis_bytes(&bytes[0..TIMESTAMP_LENGTH]) {
152 return false;
153 }
154 if bytes[23] != SPACE_BYTE || bytes[24] != OPEN_PAREN_BYTE {
155 return false;
156 }
157 line.find(CLOSE_PAREN_CHAR).is_some()
159}
160
161#[inline]
165fn validate_meta_fields_fast(meta: &str) -> bool {
166 let bytes = meta.as_bytes();
167 let len = bytes.len();
168
169 if len < 38 {
171 return false;
172 }
173
174 #[inline(always)]
176 fn check_prefix(bytes: &[u8], prefix: &[u8]) -> bool {
177 bytes.len() >= prefix.len() && &bytes[..prefix.len()] == prefix
178 }
179
180 #[inline(always)]
182 fn find_space(bytes: &[u8]) -> Option<usize> {
183 bytes.iter().position(|&b| b == b' ')
184 }
185
186 let mut pos = 0;
187
188 if !check_prefix(&bytes[pos..], b"EP[") {
190 return false;
191 }
192 pos = match find_space(&bytes[pos..]) {
193 Some(idx) => pos + idx + 1,
194 None => return false,
195 };
196 if pos >= len {
197 return false;
198 }
199
200 if !check_prefix(&bytes[pos..], b"sess:") {
202 return false;
203 }
204 pos = match find_space(&bytes[pos..]) {
205 Some(idx) => pos + idx + 1,
206 None => return false,
207 };
208 if pos >= len {
209 return false;
210 }
211
212 if !check_prefix(&bytes[pos..], b"thrd:") {
214 return false;
215 }
216 pos = match find_space(&bytes[pos..]) {
217 Some(idx) => pos + idx + 1,
218 None => return false,
219 };
220 if pos >= len {
221 return false;
222 }
223
224 if !check_prefix(&bytes[pos..], b"user:") {
226 return false;
227 }
228 pos = match find_space(&bytes[pos..]) {
229 Some(idx) => pos + idx + 1,
230 None => return false,
231 };
232 if pos >= len {
233 return false;
234 }
235
236 check_prefix(&bytes[pos..], b"trxid:")
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243
244 mod timestamp_tests {
245 use super::*;
246
247 #[test]
248 fn valid_timestamps() {
249 let valid_cases: &[&[u8]] = &[
250 b"2024-06-15 12:34:56.789",
251 b"2000-01-01 00:00:00.000",
252 b"2099-12-31 23:59:59.999",
253 b"2024-02-29 12:34:56.789", ];
255 for ts in valid_cases {
256 assert!(is_ts_millis_bytes(ts), "Failed for: {:?}", ts);
257 }
258 }
259
260 #[test]
261 fn wrong_length() {
262 let invalid_cases: &[&[u8]] = &[
263 b"2024-06-15 12:34:56",
264 b"2024-06-15 12:34:56.7",
265 b"2024-06-15 12:34:56.7890",
266 b"",
267 b"2024",
268 ];
269 for ts in invalid_cases {
270 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
271 }
272 }
273
274 #[test]
275 fn wrong_separator() {
276 let invalid_cases: &[&[u8]] = &[
277 b"2024-06-15 12:34:56,789", b"2024/06/15 12:34:56.789", b"2024-06-15T12:34:56.789", b"2024-06-15-12:34:56.789", b"2024-06-15 12-34-56.789", ];
283 for ts in invalid_cases {
284 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
285 }
286 }
287
288 #[test]
289 fn non_digits() {
290 let invalid_cases: &[&[u8]] = &[
291 b"202a-06-15 12:34:56.789",
292 b"2024-0b-15 12:34:56.789",
293 b"2024-06-1c 12:34:56.789",
294 b"2024-06-15 1d:34:56.789",
295 b"2024-06-15 12:3e:56.789",
296 b"2024-06-15 12:34:5f.789",
297 b"2024-06-15 12:34:56.78g",
298 ];
299 for ts in invalid_cases {
300 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
301 }
302 }
303
304 #[test]
305 fn special_chars() {
306 assert!(!is_ts_millis_bytes(b"2024-06-15 12:34:56.\x00\x00\x00"));
307 assert!(!is_ts_millis_bytes(b"\x002024-06-15 12:34:56.789"));
308 }
309 }
310
311 mod record_start_line_tests {
312 use super::*;
313
314 #[test]
315 fn valid_complete_line() {
316 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname: ip:::ffff:10.3.100.68) [SEL] select 1 from dual EXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 289655178.";
317 assert!(is_record_start_line(line));
318 }
319
320 #[test]
321 fn valid_without_ip() {
322 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname:) [SEL] select 1 from dual";
323 assert!(is_record_start_line(line));
324 }
325
326 #[test]
327 fn minimal_valid() {
328 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
329 assert!(is_record_start_line(line));
330 }
331
332 #[test]
333 fn too_short() {
334 let short_lines = [
335 "2025-08-12 10:57:09.548",
336 "2025-08-12 10:57:09.548 (",
337 "",
338 "short",
339 ];
340 for line in &short_lines {
341 assert!(!is_record_start_line(line), "Should fail for: {}", line);
342 }
343 }
344
345 #[test]
346 fn invalid_timestamp() {
347 let line = "2025-08-12 10:57:09,548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
348 assert!(!is_record_start_line(line));
349 }
350
351 #[test]
352 fn format_errors() {
353 let invalid_lines = [
354 "2025-08-12 10:57:09.548(EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body", "2025-08-12 10:57:09.548 EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body", "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app body", ];
358 for line in &invalid_lines {
359 assert!(!is_record_start_line(line), "Should fail for: {}", line);
360 }
361 }
362
363 #[test]
364 fn insufficient_fields() {
365 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice) body";
367 assert!(!is_record_start_line(line));
368 }
369
370 #[test]
371 fn wrong_field_order() {
372 let line = "2025-08-12 10:57:09.548 (sess:123 EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
373 assert!(!is_record_start_line(line));
374 }
375
376 #[test]
377 fn missing_required_fields() {
378 let test_cases = [
380 (
381 "2025-08-12 10:57:09.548 (sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
382 "EP",
383 ),
384 (
385 "2025-08-12 10:57:09.548 (EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
386 "sess",
387 ),
388 (
389 "2025-08-12 10:57:09.548 (EP[0] sess:123 user:alice trxid:789 stmt:999 appname:app) body",
390 "thrd",
391 ),
392 (
393 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 trxid:789 stmt:999 appname:app) body",
394 "user",
395 ),
396 (
397 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice stmt:999 appname:app) body",
398 "trxid",
399 ),
400 ];
401 for (line, field) in &test_cases {
402 assert!(
403 !is_record_start_line(line),
404 "Should fail when missing {} field",
405 field
406 );
407 }
408 }
409
410 #[test]
411 fn with_valid_ip() {
412 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:::ffff:192.168.1.100) body";
413 assert!(is_record_start_line(line));
414 }
415
416 #[test]
417 fn with_invalid_ip_format() {
418 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:192.168.1.100) body";
420 assert!(is_record_start_line(line));
423 }
424
425 #[test]
426 fn complex_field_values() {
427 let line = "2025-08-12 10:57:09.548 (EP[123] sess:0xABCD1234 thrd:9999999 user:USER_WITH_UNDERSCORES trxid:12345678 stmt:0xFFFFFFFF appname:app-name-with-dashes ip:::ffff:10.20.30.40) SELECT * FROM table";
428 assert!(is_record_start_line(line));
429 }
430
431 #[test]
432 fn empty_appname() {
433 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:) body";
434 assert!(is_record_start_line(line));
435 }
436
437 #[test]
438 fn continuation_line() {
439 let continuation = " SELECT * FROM users WHERE id = 1";
440 assert!(!is_record_start_line(continuation));
441 }
442
443 #[test]
444 fn double_space_in_meta() {
445 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
448 assert!(!is_record_start_line(line));
450
451 let valid_line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
453 assert!(is_record_start_line(valid_line));
454 }
455 }
456}