dm_database_parser_sqllog/
tools.rs1const TIMESTAMP_LENGTH: usize = 23;
11const MIN_LINE_LENGTH: usize = 25;
12const TIMESTAMP_SEPARATOR_POSITIONS: [(usize, u8); 6] = [
13 (4, b'-'),
14 (7, b'-'),
15 (10, b' '),
16 (13, b':'),
17 (16, b':'),
18 (19, b'.'),
19];
20const TIMESTAMP_DIGIT_POSITIONS: [usize; 17] =
21 [0, 1, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 22];
22
23const SPACE_BYTE: u8 = b' ';
25const OPEN_PAREN_BYTE: u8 = b'(';
26const CLOSE_PAREN_CHAR: char = ')';
27
28#[inline(always)]
52pub fn is_ts_millis_bytes(bytes: &[u8]) -> bool {
53 if bytes.len() != TIMESTAMP_LENGTH {
54 return false;
55 }
56
57 for &(pos, expected) in &TIMESTAMP_SEPARATOR_POSITIONS {
59 if bytes[pos] != expected {
60 return false;
61 }
62 }
63
64 for &i in &TIMESTAMP_DIGIT_POSITIONS {
66 if !bytes[i].is_ascii_digit() {
67 return false;
68 }
69 }
70
71 true
72}
73
74pub fn is_record_start_line(line: &str) -> bool {
108 let bytes = line.as_bytes();
110 if bytes.len() < MIN_LINE_LENGTH {
111 return false;
112 }
113
114 if !is_ts_millis_bytes(&bytes[0..TIMESTAMP_LENGTH]) {
116 return false;
117 }
118
119 if bytes[23] != SPACE_BYTE || bytes[24] != OPEN_PAREN_BYTE {
121 return false;
122 }
123
124 let closing_paren_index = match line.find(CLOSE_PAREN_CHAR) {
126 Some(idx) => idx,
127 None => return false,
128 };
129
130 let meta_part = &line[25..closing_paren_index];
132 validate_meta_fields_fast(meta_part)
133}
134
135#[inline]
139fn validate_meta_fields_fast(meta: &str) -> bool {
140 let bytes = meta.as_bytes();
141 let len = bytes.len();
142
143 if len < 38 {
145 return false;
146 }
147
148 #[inline(always)]
150 fn check_prefix(bytes: &[u8], prefix: &[u8]) -> bool {
151 bytes.len() >= prefix.len() && &bytes[..prefix.len()] == prefix
152 }
153
154 #[inline(always)]
156 fn find_space(bytes: &[u8]) -> Option<usize> {
157 bytes.iter().position(|&b| b == b' ')
158 }
159
160 let mut pos = 0;
161
162 if !check_prefix(&bytes[pos..], b"EP[") {
164 return false;
165 }
166 pos = match find_space(&bytes[pos..]) {
167 Some(idx) => pos + idx + 1,
168 None => return false,
169 };
170 if pos >= len {
171 return false;
172 }
173
174 if !check_prefix(&bytes[pos..], b"sess:") {
176 return false;
177 }
178 pos = match find_space(&bytes[pos..]) {
179 Some(idx) => pos + idx + 1,
180 None => return false,
181 };
182 if pos >= len {
183 return false;
184 }
185
186 if !check_prefix(&bytes[pos..], b"thrd:") {
188 return false;
189 }
190 pos = match find_space(&bytes[pos..]) {
191 Some(idx) => pos + idx + 1,
192 None => return false,
193 };
194 if pos >= len {
195 return false;
196 }
197
198 if !check_prefix(&bytes[pos..], b"user:") {
200 return false;
201 }
202 pos = match find_space(&bytes[pos..]) {
203 Some(idx) => pos + idx + 1,
204 None => return false,
205 };
206 if pos >= len {
207 return false;
208 }
209
210 check_prefix(&bytes[pos..], b"trxid:")
212}
213
214#[cfg(test)]
215mod tests {
216 use super::*;
217
218 mod timestamp_tests {
219 use super::*;
220
221 #[test]
222 fn valid_timestamps() {
223 let valid_cases: &[&[u8]] = &[
224 b"2024-06-15 12:34:56.789",
225 b"2000-01-01 00:00:00.000",
226 b"2099-12-31 23:59:59.999",
227 b"2024-02-29 12:34:56.789", ];
229 for ts in valid_cases {
230 assert!(is_ts_millis_bytes(ts), "Failed for: {:?}", ts);
231 }
232 }
233
234 #[test]
235 fn wrong_length() {
236 let invalid_cases: &[&[u8]] = &[
237 b"2024-06-15 12:34:56",
238 b"2024-06-15 12:34:56.7",
239 b"2024-06-15 12:34:56.7890",
240 b"",
241 b"2024",
242 ];
243 for ts in invalid_cases {
244 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
245 }
246 }
247
248 #[test]
249 fn wrong_separator() {
250 let invalid_cases: &[&[u8]] = &[
251 b"2024-06-15 12:34:56,789", b"2024/06/15 12:34:56.789", b"2024-06-15T12:34:56.789", b"2024-06-15-12:34:56.789", b"2024-06-15 12-34-56.789", ];
257 for ts in invalid_cases {
258 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
259 }
260 }
261
262 #[test]
263 fn non_digits() {
264 let invalid_cases: &[&[u8]] = &[
265 b"202a-06-15 12:34:56.789",
266 b"2024-0b-15 12:34:56.789",
267 b"2024-06-1c 12:34:56.789",
268 b"2024-06-15 1d:34:56.789",
269 b"2024-06-15 12:3e:56.789",
270 b"2024-06-15 12:34:5f.789",
271 b"2024-06-15 12:34:56.78g",
272 ];
273 for ts in invalid_cases {
274 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
275 }
276 }
277
278 #[test]
279 fn special_chars() {
280 assert!(!is_ts_millis_bytes(b"2024-06-15 12:34:56.\x00\x00\x00"));
281 assert!(!is_ts_millis_bytes(b"\x002024-06-15 12:34:56.789"));
282 }
283 }
284
285 mod record_start_line_tests {
286 use super::*;
287
288 #[test]
289 fn valid_complete_line() {
290 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname: ip:::ffff:10.3.100.68) [SEL] select 1 from dual EXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 289655178.";
291 assert!(is_record_start_line(line));
292 }
293
294 #[test]
295 fn valid_without_ip() {
296 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname:) [SEL] select 1 from dual";
297 assert!(is_record_start_line(line));
298 }
299
300 #[test]
301 fn minimal_valid() {
302 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
303 assert!(is_record_start_line(line));
304 }
305
306 #[test]
307 fn too_short() {
308 let short_lines = [
309 "2025-08-12 10:57:09.548",
310 "2025-08-12 10:57:09.548 (",
311 "",
312 "short",
313 ];
314 for line in &short_lines {
315 assert!(!is_record_start_line(line), "Should fail for: {}", line);
316 }
317 }
318
319 #[test]
320 fn invalid_timestamp() {
321 let line = "2025-08-12 10:57:09,548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
322 assert!(!is_record_start_line(line));
323 }
324
325 #[test]
326 fn format_errors() {
327 let invalid_lines = [
328 "2025-08-12 10:57:09.548(EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body", "2025-08-12 10:57:09.548 EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body", "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app body", ];
332 for line in &invalid_lines {
333 assert!(!is_record_start_line(line), "Should fail for: {}", line);
334 }
335 }
336
337 #[test]
338 fn insufficient_fields() {
339 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice) body";
341 assert!(!is_record_start_line(line));
342 }
343
344 #[test]
345 fn wrong_field_order() {
346 let line = "2025-08-12 10:57:09.548 (sess:123 EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
347 assert!(!is_record_start_line(line));
348 }
349
350 #[test]
351 fn missing_required_fields() {
352 let test_cases = [
354 (
355 "2025-08-12 10:57:09.548 (sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
356 "EP",
357 ),
358 (
359 "2025-08-12 10:57:09.548 (EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
360 "sess",
361 ),
362 (
363 "2025-08-12 10:57:09.548 (EP[0] sess:123 user:alice trxid:789 stmt:999 appname:app) body",
364 "thrd",
365 ),
366 (
367 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 trxid:789 stmt:999 appname:app) body",
368 "user",
369 ),
370 (
371 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice stmt:999 appname:app) body",
372 "trxid",
373 ),
374 ];
375 for (line, field) in &test_cases {
376 assert!(
377 !is_record_start_line(line),
378 "Should fail when missing {} field",
379 field
380 );
381 }
382 }
383
384 #[test]
385 fn with_valid_ip() {
386 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:::ffff:192.168.1.100) body";
387 assert!(is_record_start_line(line));
388 }
389
390 #[test]
391 fn with_invalid_ip_format() {
392 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:192.168.1.100) body";
394 assert!(is_record_start_line(line));
397 }
398
399 #[test]
400 fn complex_field_values() {
401 let line = "2025-08-12 10:57:09.548 (EP[123] sess:0xABCD1234 thrd:9999999 user:USER_WITH_UNDERSCORES trxid:12345678 stmt:0xFFFFFFFF appname:app-name-with-dashes ip:::ffff:10.20.30.40) SELECT * FROM table";
402 assert!(is_record_start_line(line));
403 }
404
405 #[test]
406 fn empty_appname() {
407 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:) body";
408 assert!(is_record_start_line(line));
409 }
410
411 #[test]
412 fn continuation_line() {
413 let continuation = " SELECT * FROM users WHERE id = 1";
414 assert!(!is_record_start_line(continuation));
415 }
416
417 #[test]
418 fn double_space_in_meta() {
419 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
422 assert!(!is_record_start_line(line));
424
425 let valid_line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
427 assert!(is_record_start_line(valid_line));
428 }
429 }
430}