Skip to main content

dm_database_parser_sqllog/
tools.rs

1use memchr::memchr;
2
3const TIMESTAMP_LENGTH: usize = 23;
4const MIN_LINE_LENGTH: usize = 25;
5
6/// 判断字节数组是否为有效的时间戳格式 "YYYY-MM-DD HH:MM:SS.mmm"(恰好 23 字节)
7///
8/// ```
9/// use dm_database_parser_sqllog::tools::is_ts_millis_bytes;
10/// assert!(is_ts_millis_bytes(b"2025-08-12 10:57:09.548"));
11/// assert!(!is_ts_millis_bytes(b"2025-08-12"));
12/// ```
13#[inline(always)]
14pub fn is_ts_millis_bytes(bytes: &[u8]) -> bool {
15    bytes.len() == TIMESTAMP_LENGTH
16        && bytes[4] == b'-'
17        && bytes[7] == b'-'
18        && bytes[10] == b' '
19        && bytes[13] == b':'
20        && bytes[16] == b':'
21        && bytes[19] == b'.'
22        && bytes[0].is_ascii_digit()
23        && bytes[1].is_ascii_digit()
24        && bytes[2].is_ascii_digit()
25        && bytes[3].is_ascii_digit()
26        && bytes[5].is_ascii_digit()
27        && bytes[6].is_ascii_digit()
28        && bytes[8].is_ascii_digit()
29        && bytes[9].is_ascii_digit()
30        && bytes[11].is_ascii_digit()
31        && bytes[12].is_ascii_digit()
32        && bytes[14].is_ascii_digit()
33        && bytes[15].is_ascii_digit()
34        && bytes[17].is_ascii_digit()
35        && bytes[18].is_ascii_digit()
36        && bytes[20].is_ascii_digit()
37        && bytes[21].is_ascii_digit()
38        && bytes[22].is_ascii_digit()
39}
40
41/// 判断一行日志是否为记录起始行
42///
43/// 验证:时间戳格式 + ` (` 前缀 + meta 包含 EP、sess、thrd、user、trxid(按序)
44///
45/// ```
46/// use dm_database_parser_sqllog::tools::is_record_start_line;
47/// let valid = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) SELECT 1";
48/// assert!(is_record_start_line(valid));
49/// assert!(!is_record_start_line("This is not a log line"));
50/// ```
51pub fn is_record_start_line(line: &str) -> bool {
52    let bytes = line.as_bytes();
53    if bytes.len() < MIN_LINE_LENGTH {
54        return false;
55    }
56    if !is_ts_millis_bytes(&bytes[..TIMESTAMP_LENGTH]) {
57        return false;
58    }
59    if bytes[23] != b' ' || bytes[24] != b'(' {
60        return false;
61    }
62    let closing = match line.find(')') {
63        Some(idx) => idx,
64        None => return false,
65    };
66    validate_meta_fields_fast(&line[25..closing])
67}
68
69/// 验证 meta 字段顺序与前缀(EP → sess → thrd → user → trxid)
70#[inline]
71fn validate_meta_fields_fast(meta: &str) -> bool {
72    let bytes = meta.as_bytes();
73    // 最小合法 meta: "EP[0] sess:1 thrd:1 user:a trxid:1"
74    if bytes.len() < 38 {
75        return false;
76    }
77    let mut pos = 0;
78    for prefix in [b"EP[" as &[u8], b"sess:", b"thrd:", b"user:"] {
79        if !bytes[pos..].starts_with(prefix) {
80            return false;
81        }
82        pos += match memchr(b' ', &bytes[pos..]) {
83            Some(idx) => idx + 1,
84            None => return false,
85        };
86    }
87    bytes[pos..].starts_with(b"trxid:")
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    mod timestamp_tests {
95        use super::*;
96
97        #[test]
98        fn valid_timestamps() {
99            let valid_cases: &[&[u8]] = &[
100                b"2024-06-15 12:34:56.789",
101                b"2000-01-01 00:00:00.000",
102                b"2099-12-31 23:59:59.999",
103                b"2024-02-29 12:34:56.789", // 闰年
104            ];
105            for ts in valid_cases {
106                assert!(is_ts_millis_bytes(ts), "Failed for: {:?}", ts);
107            }
108        }
109
110        #[test]
111        fn wrong_length() {
112            let invalid_cases: &[&[u8]] = &[
113                b"2024-06-15 12:34:56",
114                b"2024-06-15 12:34:56.7",
115                b"2024-06-15 12:34:56.7890",
116                b"",
117                b"2024",
118            ];
119            for ts in invalid_cases {
120                assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
121            }
122        }
123
124        #[test]
125        fn wrong_separator() {
126            let invalid_cases: &[&[u8]] = &[
127                b"2024-06-15 12:34:56,789", // 逗号代替点
128                b"2024/06/15 12:34:56.789", // 斜杠代替短横线
129                b"2024-06-15T12:34:56.789", // T 代替空格
130                b"2024-06-15-12:34:56.789", // 短横线代替空格
131                b"2024-06-15 12-34-56.789", // 短横线代替冒号
132            ];
133            for ts in invalid_cases {
134                assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
135            }
136        }
137
138        #[test]
139        fn non_digits() {
140            let invalid_cases: &[&[u8]] = &[
141                b"202a-06-15 12:34:56.789",
142                b"2024-0b-15 12:34:56.789",
143                b"2024-06-1c 12:34:56.789",
144                b"2024-06-15 1d:34:56.789",
145                b"2024-06-15 12:3e:56.789",
146                b"2024-06-15 12:34:5f.789",
147                b"2024-06-15 12:34:56.78g",
148            ];
149            for ts in invalid_cases {
150                assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
151            }
152        }
153
154        #[test]
155        fn special_chars() {
156            assert!(!is_ts_millis_bytes(b"2024-06-15 12:34:56.\x00\x00\x00"));
157            assert!(!is_ts_millis_bytes(b"\x002024-06-15 12:34:56.789"));
158        }
159    }
160
161    mod record_start_line_tests {
162        use super::*;
163
164        #[test]
165        fn valid_complete_line() {
166            let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname: ip:::ffff:10.3.100.68) [SEL] select 1 from dual EXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 289655178.";
167            assert!(is_record_start_line(line));
168        }
169
170        #[test]
171        fn valid_without_ip() {
172            let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname:) [SEL] select 1 from dual";
173            assert!(is_record_start_line(line));
174        }
175
176        #[test]
177        fn minimal_valid() {
178            let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
179            assert!(is_record_start_line(line));
180        }
181
182        #[test]
183        fn too_short() {
184            let short_lines = [
185                "2025-08-12 10:57:09.548",
186                "2025-08-12 10:57:09.548 (",
187                "",
188                "short",
189            ];
190            for line in &short_lines {
191                assert!(!is_record_start_line(line), "Should fail for: {}", line);
192            }
193        }
194
195        #[test]
196        fn invalid_timestamp() {
197            let line = "2025-08-12 10:57:09,548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
198            assert!(!is_record_start_line(line));
199        }
200
201        #[test]
202        fn format_errors() {
203            let invalid_lines = [
204                "2025-08-12 10:57:09.548(EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
205                "2025-08-12 10:57:09.548 EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
206                "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app body",
207            ];
208            for line in &invalid_lines {
209                assert!(!is_record_start_line(line), "Should fail for: {}", line);
210            }
211        }
212
213        #[test]
214        fn insufficient_fields() {
215            let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice) body";
216            assert!(!is_record_start_line(line));
217        }
218
219        #[test]
220        fn wrong_field_order() {
221            let line = "2025-08-12 10:57:09.548 (sess:123 EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
222            assert!(!is_record_start_line(line));
223        }
224
225        #[test]
226        fn missing_required_fields() {
227            let test_cases = [
228                (
229                    "2025-08-12 10:57:09.548 (sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
230                    "EP",
231                ),
232                (
233                    "2025-08-12 10:57:09.548 (EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
234                    "sess",
235                ),
236                (
237                    "2025-08-12 10:57:09.548 (EP[0] sess:123 user:alice trxid:789 stmt:999 appname:app) body",
238                    "thrd",
239                ),
240                (
241                    "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 trxid:789 stmt:999 appname:app) body",
242                    "user",
243                ),
244                (
245                    "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice stmt:999 appname:app) body",
246                    "trxid",
247                ),
248            ];
249            for (line, field) in &test_cases {
250                assert!(
251                    !is_record_start_line(line),
252                    "Should fail when missing {} field",
253                    field
254                );
255            }
256        }
257
258        #[test]
259        fn with_valid_ip() {
260            let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:::ffff:192.168.1.100) body";
261            assert!(is_record_start_line(line));
262        }
263
264        #[test]
265        fn with_invalid_ip_format() {
266            let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:192.168.1.100) body";
267            assert!(is_record_start_line(line));
268        }
269
270        #[test]
271        fn complex_field_values() {
272            let line = "2025-08-12 10:57:09.548 (EP[123] sess:0xABCD1234 thrd:9999999 user:USER_WITH_UNDERSCORES trxid:12345678 stmt:0xFFFFFFFF appname:app-name-with-dashes ip:::ffff:10.20.30.40) SELECT * FROM table";
273            assert!(is_record_start_line(line));
274        }
275
276        #[test]
277        fn empty_appname() {
278            let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:) body";
279            assert!(is_record_start_line(line));
280        }
281
282        #[test]
283        fn continuation_line() {
284            let continuation = "    SELECT * FROM users WHERE id = 1";
285            assert!(!is_record_start_line(continuation));
286        }
287
288        #[test]
289        fn double_space_in_meta() {
290            let line = "2025-08-12 10:57:09.548 (EP[0]  sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
291            assert!(!is_record_start_line(line));
292
293            let valid_line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
294            assert!(is_record_start_line(valid_line));
295        }
296    }
297}