dm_database_parser_sqllog/
tools.rs1use memchr::memchr;
2
3const TIMESTAMP_LENGTH: usize = 23;
4const MIN_LINE_LENGTH: usize = 25;
5
6#[inline(always)]
14pub fn is_ts_millis_bytes(bytes: &[u8]) -> bool {
15 bytes.len() == TIMESTAMP_LENGTH
16 && bytes[4] == b'-'
17 && bytes[7] == b'-'
18 && bytes[10] == b' '
19 && bytes[13] == b':'
20 && bytes[16] == b':'
21 && bytes[19] == b'.'
22 && bytes[0].is_ascii_digit()
23 && bytes[1].is_ascii_digit()
24 && bytes[2].is_ascii_digit()
25 && bytes[3].is_ascii_digit()
26 && bytes[5].is_ascii_digit()
27 && bytes[6].is_ascii_digit()
28 && bytes[8].is_ascii_digit()
29 && bytes[9].is_ascii_digit()
30 && bytes[11].is_ascii_digit()
31 && bytes[12].is_ascii_digit()
32 && bytes[14].is_ascii_digit()
33 && bytes[15].is_ascii_digit()
34 && bytes[17].is_ascii_digit()
35 && bytes[18].is_ascii_digit()
36 && bytes[20].is_ascii_digit()
37 && bytes[21].is_ascii_digit()
38 && bytes[22].is_ascii_digit()
39}
40
41pub fn is_record_start_line(line: &str) -> bool {
52 let bytes = line.as_bytes();
53 if bytes.len() < MIN_LINE_LENGTH {
54 return false;
55 }
56 if !is_ts_millis_bytes(&bytes[..TIMESTAMP_LENGTH]) {
57 return false;
58 }
59 if bytes[23] != b' ' || bytes[24] != b'(' {
60 return false;
61 }
62 let closing = match line.find(')') {
63 Some(idx) => idx,
64 None => return false,
65 };
66 validate_meta_fields_fast(&line[25..closing])
67}
68
69#[inline]
71fn validate_meta_fields_fast(meta: &str) -> bool {
72 let bytes = meta.as_bytes();
73 if bytes.len() < 38 {
75 return false;
76 }
77 let mut pos = 0;
78 for prefix in [b"EP[" as &[u8], b"sess:", b"thrd:", b"user:"] {
79 if !bytes[pos..].starts_with(prefix) {
80 return false;
81 }
82 pos += match memchr(b' ', &bytes[pos..]) {
83 Some(idx) => idx + 1,
84 None => return false,
85 };
86 }
87 bytes[pos..].starts_with(b"trxid:")
88}
89
90#[cfg(test)]
91mod tests {
92 use super::*;
93
94 mod timestamp_tests {
95 use super::*;
96
97 #[test]
98 fn valid_timestamps() {
99 let valid_cases: &[&[u8]] = &[
100 b"2024-06-15 12:34:56.789",
101 b"2000-01-01 00:00:00.000",
102 b"2099-12-31 23:59:59.999",
103 b"2024-02-29 12:34:56.789", ];
105 for ts in valid_cases {
106 assert!(is_ts_millis_bytes(ts), "Failed for: {:?}", ts);
107 }
108 }
109
110 #[test]
111 fn wrong_length() {
112 let invalid_cases: &[&[u8]] = &[
113 b"2024-06-15 12:34:56",
114 b"2024-06-15 12:34:56.7",
115 b"2024-06-15 12:34:56.7890",
116 b"",
117 b"2024",
118 ];
119 for ts in invalid_cases {
120 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
121 }
122 }
123
124 #[test]
125 fn wrong_separator() {
126 let invalid_cases: &[&[u8]] = &[
127 b"2024-06-15 12:34:56,789", b"2024/06/15 12:34:56.789", b"2024-06-15T12:34:56.789", b"2024-06-15-12:34:56.789", b"2024-06-15 12-34-56.789", ];
133 for ts in invalid_cases {
134 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
135 }
136 }
137
138 #[test]
139 fn non_digits() {
140 let invalid_cases: &[&[u8]] = &[
141 b"202a-06-15 12:34:56.789",
142 b"2024-0b-15 12:34:56.789",
143 b"2024-06-1c 12:34:56.789",
144 b"2024-06-15 1d:34:56.789",
145 b"2024-06-15 12:3e:56.789",
146 b"2024-06-15 12:34:5f.789",
147 b"2024-06-15 12:34:56.78g",
148 ];
149 for ts in invalid_cases {
150 assert!(!is_ts_millis_bytes(ts), "Should fail for: {:?}", ts);
151 }
152 }
153
154 #[test]
155 fn special_chars() {
156 assert!(!is_ts_millis_bytes(b"2024-06-15 12:34:56.\x00\x00\x00"));
157 assert!(!is_ts_millis_bytes(b"\x002024-06-15 12:34:56.789"));
158 }
159 }
160
161 mod record_start_line_tests {
162 use super::*;
163
164 #[test]
165 fn valid_complete_line() {
166 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname: ip:::ffff:10.3.100.68) [SEL] select 1 from dual EXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 289655178.";
167 assert!(is_record_start_line(line));
168 }
169
170 #[test]
171 fn valid_without_ip() {
172 let line = "2025-08-12 10:57:09.548 (EP[0] sess:0x178ebca0 thrd:757455 user:HBTCOMS_V3_PROD trxid:0 stmt:0x285eb060 appname:) [SEL] select 1 from dual";
173 assert!(is_record_start_line(line));
174 }
175
176 #[test]
177 fn minimal_valid() {
178 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
179 assert!(is_record_start_line(line));
180 }
181
182 #[test]
183 fn too_short() {
184 let short_lines = [
185 "2025-08-12 10:57:09.548",
186 "2025-08-12 10:57:09.548 (",
187 "",
188 "short",
189 ];
190 for line in &short_lines {
191 assert!(!is_record_start_line(line), "Should fail for: {}", line);
192 }
193 }
194
195 #[test]
196 fn invalid_timestamp() {
197 let line = "2025-08-12 10:57:09,548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
198 assert!(!is_record_start_line(line));
199 }
200
201 #[test]
202 fn format_errors() {
203 let invalid_lines = [
204 "2025-08-12 10:57:09.548(EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
205 "2025-08-12 10:57:09.548 EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
206 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app body",
207 ];
208 for line in &invalid_lines {
209 assert!(!is_record_start_line(line), "Should fail for: {}", line);
210 }
211 }
212
213 #[test]
214 fn insufficient_fields() {
215 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice) body";
216 assert!(!is_record_start_line(line));
217 }
218
219 #[test]
220 fn wrong_field_order() {
221 let line = "2025-08-12 10:57:09.548 (sess:123 EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
222 assert!(!is_record_start_line(line));
223 }
224
225 #[test]
226 fn missing_required_fields() {
227 let test_cases = [
228 (
229 "2025-08-12 10:57:09.548 (sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
230 "EP",
231 ),
232 (
233 "2025-08-12 10:57:09.548 (EP[0] thrd:456 user:alice trxid:789 stmt:999 appname:app) body",
234 "sess",
235 ),
236 (
237 "2025-08-12 10:57:09.548 (EP[0] sess:123 user:alice trxid:789 stmt:999 appname:app) body",
238 "thrd",
239 ),
240 (
241 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 trxid:789 stmt:999 appname:app) body",
242 "user",
243 ),
244 (
245 "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice stmt:999 appname:app) body",
246 "trxid",
247 ),
248 ];
249 for (line, field) in &test_cases {
250 assert!(
251 !is_record_start_line(line),
252 "Should fail when missing {} field",
253 field
254 );
255 }
256 }
257
258 #[test]
259 fn with_valid_ip() {
260 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:::ffff:192.168.1.100) body";
261 assert!(is_record_start_line(line));
262 }
263
264 #[test]
265 fn with_invalid_ip_format() {
266 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app ip:192.168.1.100) body";
267 assert!(is_record_start_line(line));
268 }
269
270 #[test]
271 fn complex_field_values() {
272 let line = "2025-08-12 10:57:09.548 (EP[123] sess:0xABCD1234 thrd:9999999 user:USER_WITH_UNDERSCORES trxid:12345678 stmt:0xFFFFFFFF appname:app-name-with-dashes ip:::ffff:10.20.30.40) SELECT * FROM table";
273 assert!(is_record_start_line(line));
274 }
275
276 #[test]
277 fn empty_appname() {
278 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:) body";
279 assert!(is_record_start_line(line));
280 }
281
282 #[test]
283 fn continuation_line() {
284 let continuation = " SELECT * FROM users WHERE id = 1";
285 assert!(!is_record_start_line(continuation));
286 }
287
288 #[test]
289 fn double_space_in_meta() {
290 let line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
291 assert!(!is_record_start_line(line));
292
293 let valid_line = "2025-08-12 10:57:09.548 (EP[0] sess:123 thrd:456 user:alice trxid:789 stmt:999 appname:app) body";
294 assert!(is_record_start_line(valid_line));
295 }
296 }
297}