dm_database_parser_sqllog/
tools.rs1const TIMESTAMP_LENGTH: usize = 23;
7const MIN_LINE_LENGTH: usize = 25;
8const TIMESTAMP_SEPARATOR_POSITIONS: [(usize, u8); 6] = [
9 (4, b'-'),
10 (7, b'-'),
11 (10, b' '),
12 (13, b':'),
13 (16, b':'),
14 (19, b'.'),
15];
16const TIMESTAMP_DIGIT_POSITIONS: [usize; 17] =
17 [0, 1, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 22];
18
19const SPACE_BYTE: u8 = b' ';
21const OPEN_PAREN_BYTE: u8 = b'(';
22const CLOSE_PAREN_CHAR: char = ')';
23
24#[inline(always)]
48pub fn is_ts_millis_bytes(bytes: &[u8]) -> bool {
49 if bytes.len() != TIMESTAMP_LENGTH {
50 return false;
51 }
52
53 for &(pos, expected) in &TIMESTAMP_SEPARATOR_POSITIONS {
55 if bytes[pos] != expected {
56 return false;
57 }
58 }
59
60 for &i in &TIMESTAMP_DIGIT_POSITIONS {
62 if !bytes[i].is_ascii_digit() {
63 return false;
64 }
65 }
66
67 true
68}
69
70pub fn is_record_start_line(line: &str) -> bool {
104 let bytes = line.as_bytes();
106 if bytes.len() < MIN_LINE_LENGTH {
107 return false;
108 }
109
110 if !is_ts_millis_bytes(&bytes[0..TIMESTAMP_LENGTH]) {
112 return false;
113 }
114
115 if bytes[23] != SPACE_BYTE || bytes[24] != OPEN_PAREN_BYTE {
117 return false;
118 }
119
120 let closing_paren_index = match line.find(CLOSE_PAREN_CHAR) {
122 Some(idx) => idx,
123 None => return false,
124 };
125
126 let meta_part = &line[25..closing_paren_index];
128 validate_meta_fields_fast(meta_part)
129}
130
131#[inline]
135fn validate_meta_fields_fast(meta: &str) -> bool {
136 let bytes = meta.as_bytes();
137 let len = bytes.len();
138
139 if len < 38 {
141 return false;
142 }
143
144 #[inline(always)]
146 fn check_prefix(bytes: &[u8], prefix: &[u8]) -> bool {
147 bytes.len() >= prefix.len() && &bytes[..prefix.len()] == prefix
148 }
149
150 #[inline(always)]
152 fn find_space(bytes: &[u8]) -> Option<usize> {
153 bytes.iter().position(|&b| b == b' ')
154 }
155
156 let mut pos = 0;
157
158 if !check_prefix(&bytes[pos..], b"EP[") {
160 return false;
161 }
162 pos = match find_space(&bytes[pos..]) {
163 Some(idx) => pos + idx + 1,
164 None => return false,
165 };
166 if pos >= len {
167 return false;
168 }
169
170 if !check_prefix(&bytes[pos..], b"sess:") {
172 return false;
173 }
174 pos = match find_space(&bytes[pos..]) {
175 Some(idx) => pos + idx + 1,
176 None => return false,
177 };
178 if pos >= len {
179 return false;
180 }
181
182 if !check_prefix(&bytes[pos..], b"thrd:") {
184 return false;
185 }
186 pos = match find_space(&bytes[pos..]) {
187 Some(idx) => pos + idx + 1,
188 None => return false,
189 };
190 if pos >= len {
191 return false;
192 }
193
194 if !check_prefix(&bytes[pos..], b"user:") {
196 return false;
197 }
198 pos = match find_space(&bytes[pos..]) {
199 Some(idx) => pos + idx + 1,
200 None => return false,
201 };
202 if pos >= len {
203 return false;
204 }
205
206 check_prefix(&bytes[pos..], b"trxid:")
208}
209
210#[cfg(test)]
211mod tests {
212 use super::*;
213
214 mod timestamp_tests {
215 use super::*;
216
217 #[test]
218 fn valid_timestamps() {
219 let valid_cases: &[&[u8]] = &[
220 b"2024-06-15 12:34:56.789",
221 b"2000-01-01 00:00:00.000",
222 b"2099-12-31 23:59:59.999",
223 b"2024-02-29 12:34:56.789", ];
225 for ts in valid_cases {
226 assert!(is_ts_millis_bytes(ts), "Failed for: {:?}", ts);
227 }
228 }
229
230 #[test]
231 fn wrong_length() {
232 let invalid_cases: &[&[u8]] = &[
233 b"2024-06-15 12:34:56",
234 b"2024-06-15 12:34:56.7",
235 b"2024-06-15 12:34:56.7890",
236 b"",
237 b"2024",
238 ];
239 for ts in invalid_cases {
240 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
241 }
242 }
243
244 #[test]
245 fn wrong_separator() {
246 let invalid_cases: &[&[u8]] = &[
247 b"2024-06-15 12:34:56,789", b"2024/06/15 12:34:56.789", b"2024-06-15T12:34:56.789", b"2024-06-15-12:34:56.789", b"2024-06-15 12-34-56.789", ];
253 for ts in invalid_cases {
254 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
255 }
256 }
257
258 #[test]
259 fn non_digits() {
260 let invalid_cases: &[&[u8]] = &[
261 b"202a-06-15 12:34:56.789",
262 b"2024-0b-15 12:34:56.789",
263 b"2024-06-1c 12:34:56.789",
264 b"2024-06-15 1d:34:56.789",
265 b"2024-06-15 12:3e:56.789",
266 b"2024-06-15 12:34:5f.789",
267 b"2024-06-15 12:34:56.78g",
268 ];
269 for ts in invalid_cases {
270 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
271 }
272 }
273
274 #[test]
275 fn special_chars() {
276 assert!(!is_ts_millis_bytes(b"2024-06-15 12:34:56.\x00\x00\x00"));
277 assert!(!is_ts_millis_bytes(b"\x002024-06-15 12:34:56.789"));
278 }
279 }
280
281 mod record_start_line_tests {
282 use super::*;
283
284 #[test]
285 fn valid_complete_line() {
286 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname: ip:::ffff:10.3.100.68) [SEL] select 1 from dual EXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 289655178.";
287 assert!(is_record_start_line(line));
288 }
289
290 #[test]
291 fn valid_without_ip() {
292 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname:) [SEL] select 1 from dual";
293 assert!(is_record_start_line(line));
294 }
295
296 #[test]
297 fn minimal_valid() {
298 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
299 assert!(is_record_start_line(line));
300 }
301
302 #[test]
303 fn too_short() {
304 let short_lines = [
305 "2025-08-12 10:57:09.548",
306 "2025-08-12 10:57:09.548 (",
307 "",
308 "short",
309 ];
310 for line in &short_lines {
311 assert!(!is_record_start_line(line), "Should fail for: {}", line);
312 }
313 }
314
315 #[test]
316 fn invalid_timestamp() {
317 let line = "2025-08-12 10:57:09,548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
318 assert!(!is_record_start_line(line));
319 }
320
321 #[test]
322 fn format_errors() {
323 let invalid_lines = [
324 "2025-08-12 10:57:09.548(EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body", "2025-08-12 10:57:09.548 EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body", "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app body", ];
328 for line in &invalid_lines {
329 assert!(!is_record_start_line(line), "Should fail for: {}", line);
330 }
331 }
332
333 #[test]
334 fn insufficient_fields() {
335 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice) body";
337 assert!(!is_record_start_line(line));
338 }
339
340 #[test]
341 fn wrong_field_order() {
342 let line = "2025-08-12 10:57:09.548 (sess:123 EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
343 assert!(!is_record_start_line(line));
344 }
345
346 #[test]
347 fn missing_required_fields() {
348 let test_cases = [
350 (
351 "2025-08-12 10:57:09.548 (sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
352 "EP",
353 ),
354 (
355 "2025-08-12 10:57:09.548 (EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
356 "sess",
357 ),
358 (
359 "2025-08-12 10:57:09.548 (EP[0] sess:123 user:alice trxid:789 stmt:999 appname:app) body",
360 "thrd",
361 ),
362 (
363 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 trxid:789 stmt:999 appname:app) body",
364 "user",
365 ),
366 (
367 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice stmt:999 appname:app) body",
368 "trxid",
369 ),
370 ];
371 for (line, field) in &test_cases {
372 assert!(
373 !is_record_start_line(line),
374 "Should fail when missing {} field",
375 field
376 );
377 }
378 }
379
380 #[test]
381 fn with_valid_ip() {
382 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:::ffff:192.168.1.100) body";
383 assert!(is_record_start_line(line));
384 }
385
386 #[test]
387 fn with_invalid_ip_format() {
388 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:192.168.1.100) body";
390 assert!(is_record_start_line(line));
393 }
394
395 #[test]
396 fn complex_field_values() {
397 let line = "2025-08-12 10:57:09.548 (EP[123] sess:0xABCD1234 thrd:9999999 user:USER_WITH_UNDERSCORES trxid:12345678 stmt:0xFFFFFFFF appname:app-name-with-dashes ip:::ffff:10.20.30.40) SELECT * FROM table";
398 assert!(is_record_start_line(line));
399 }
400
401 #[test]
402 fn empty_appname() {
403 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:) body";
404 assert!(is_record_start_line(line));
405 }
406
407 #[test]
408 fn continuation_line() {
409 let continuation = " SELECT * FROM users WHERE id = 1";
410 assert!(!is_record_start_line(continuation));
411 }
412
413 #[test]
414 fn double_space_in_meta() {
415 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
418 assert!(!is_record_start_line(line));
420
421 let valid_line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
423 assert!(is_record_start_line(valid_line));
424 }
425 }
426}