1pub mod builder;
2pub(crate) mod encoding;
3pub mod iterator;
4
5pub use builder::LogParserBuilder;
6pub use encoding::FileEncodingHint;
7pub use iterator::LogIterator;
8
9use memchr::memmem::Finder;
10use memchr::{memchr, memrchr};
11use std::str;
12use std::sync::LazyLock;
13
14use crate::error::ParseError;
15use crate::record::{self, Sqllog};
16use ::encoding::all::GB18030;
17use ::encoding::{DecoderTrap, Encoding};
18
19static FINDER_CLOSE_META: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b") "));
21
22pub struct LogParser {
27 pub(super) data: Vec<u8>,
28 pub(super) encoding: FileEncodingHint,
29}
30
31impl LogParser {
32 pub fn iter(&self) -> LogIterator<'_> {
34 LogIterator {
35 data: &self.data,
36 pos: 0,
37 encoding: self.encoding,
38 line_number: 1,
39 }
40 }
41}
42
43#[cfg(test)]
47pub(crate) fn parse_record(record_bytes: &[u8]) -> Result<Sqllog, ParseError> {
48 parse_record_with_hint(record_bytes, FileEncodingHint::Auto, 0)
49}
50
51pub(super) fn parse_record_with_hint(
53 record_bytes: &[u8],
54 encoding_hint: FileEncodingHint,
55 line_number: u64,
56) -> Result<Sqllog, ParseError> {
57 let is_multiline = memchr(b'\n', record_bytes).is_some();
59
60 let first_line = if is_multiline {
62 match memchr(b'\n', record_bytes) {
63 Some(idx) => {
64 let mut line = &record_bytes[..idx];
65 if line.ends_with(b"\r") {
66 line = &line[..line.len() - 1];
67 }
68 line
69 }
70 None => {
71 let mut line = record_bytes;
72 if line.ends_with(b"\r") {
73 line = &line[..line.len() - 1];
74 }
75 line
76 }
77 }
78 } else {
79 let mut line = record_bytes;
80 if line.ends_with(b"\r") {
81 line = &line[..line.len() - 1];
82 }
83 line
84 };
85
86 if first_line.len() < 23 {
88 return Err(make_invalid_format_error(first_line, line_number));
89 }
90 let ts = match str::from_utf8(&first_line[0..23]) {
91 Ok(s) => s.to_string(),
92 Err(_) => return Err(make_invalid_format_error(first_line, line_number)),
93 };
94
95 let meta_start = match memchr(b'(', &first_line[23..]) {
97 Some(idx) => 23 + idx,
98 None => return Err(make_invalid_format_error(first_line, line_number)),
99 };
100
101 let meta_end = match FINDER_CLOSE_META.find(&first_line[meta_start..]) {
102 Some(idx) => Some(meta_start + idx),
103 None => memrchr(b')', &first_line[meta_start..]).map(|idx| meta_start + idx),
104 };
105
106 let meta_end = match meta_end {
107 Some(idx) => idx,
108 None => return Err(make_invalid_format_error(first_line, line_number)),
109 };
110
111 let meta_bytes = &first_line[meta_start + 1..meta_end];
112
113 let (ep, sess_id, thrd_id, username, trxid, statement, appname, client_ip) = match encoding_hint
115 {
116 FileEncodingHint::Utf8 => record::parse_meta_from_bytes(meta_bytes),
117 FileEncodingHint::Auto => {
118 match str::from_utf8(meta_bytes) {
120 Ok(_) => record::parse_meta_from_bytes(meta_bytes),
121 Err(_) => match GB18030.decode(meta_bytes, DecoderTrap::Strict) {
122 Ok(decoded) => record::parse_meta_from_bytes(decoded.as_bytes()),
123 Err(_) => {
124 let lossy = String::from_utf8_lossy(meta_bytes).into_owned();
125 record::parse_meta_from_bytes(lossy.as_bytes())
126 }
127 },
128 }
129 }
130 FileEncodingHint::Gb18030 => match GB18030.decode(meta_bytes, DecoderTrap::Strict) {
131 Ok(decoded) => record::parse_meta_from_bytes(decoded.as_bytes()),
132 Err(_) => {
133 let lossy = String::from_utf8_lossy(meta_bytes).into_owned();
134 record::parse_meta_from_bytes(lossy.as_bytes())
135 }
136 },
137 };
138
139 let body_start_in_first_line = meta_end + 1;
141
142 let content_start = if body_start_in_first_line < first_line.len()
143 && first_line[body_start_in_first_line] == b' '
144 {
145 body_start_in_first_line + 1
146 } else {
147 body_start_in_first_line
148 };
149
150 let mut tag: Option<String> = None;
152 let content_slice = if content_start < record_bytes.len() {
153 let mut s = &record_bytes[content_start..];
154 if !s.is_empty()
155 && s[0] == b'['
156 && let Some(end_idx) = memchr(b']', s)
157 && end_idx >= 1
158 {
159 let inner = &s[1..end_idx];
160 if !inner.contains(&b' ') && inner.len() <= 32 {
161 tag = match encoding_hint {
162 FileEncodingHint::Utf8 => str::from_utf8(inner).ok().map(|t| t.to_string()),
163 FileEncodingHint::Auto => str::from_utf8(inner)
164 .ok()
165 .map(|t| t.to_string())
166 .or_else(|| GB18030.decode(inner, DecoderTrap::Strict).ok()),
167 FileEncodingHint::Gb18030 => GB18030
168 .decode(inner, DecoderTrap::Strict)
169 .ok()
170 .or_else(|| str::from_utf8(inner).ok().map(|s| s.to_string())),
171 };
172 s = &s[end_idx + 1..];
174 let mut skip = 0usize;
175 while skip < s.len() && s[skip].is_ascii_whitespace() {
176 skip += 1;
177 }
178 s = &s[skip..];
179 }
180 }
181 s
182 } else {
183 &[] as &[u8]
184 };
185
186 let split = record::find_indicators_split(content_slice);
188 let body_bytes = &content_slice[..split];
189 let ind_bytes = &content_slice[split..];
190
191 let sql_raw = match encoding_hint {
193 FileEncodingHint::Utf8 => String::from_utf8_lossy(body_bytes).into_owned(),
194 FileEncodingHint::Auto => match str::from_utf8(body_bytes) {
195 Ok(s) => s.to_string(),
196 Err(_) => match GB18030.decode(body_bytes, DecoderTrap::Strict) {
197 Ok(s) => s,
198 Err(_) => String::from_utf8_lossy(body_bytes).into_owned(),
199 },
200 },
201 FileEncodingHint::Gb18030 => match GB18030.decode(body_bytes, DecoderTrap::Strict) {
202 Ok(s) => s,
203 Err(_) => String::from_utf8_lossy(body_bytes).into_owned(),
204 },
205 };
206
207 let sql = if tag.as_deref() == Some("ORA") {
209 sql_raw.strip_prefix(": ").unwrap_or(&sql_raw).to_string()
210 } else {
211 sql_raw
212 };
213
214 let (exectime, rowcount, exec_id) = record::parse_indicators_from_bytes(ind_bytes);
216
217 Ok(Sqllog {
218 ts,
219 tag,
220 ep,
221 sess_id,
222 thrd_id,
223 username,
224 trxid,
225 statement,
226 appname,
227 client_ip,
228 sql,
229 exectime,
230 rowcount,
231 exec_id,
232 })
233}
234
235#[cold]
236fn make_invalid_format_error(raw_bytes: &[u8], line_number: u64) -> ParseError {
237 ParseError::InvalidFormat {
238 raw: String::from_utf8_lossy(raw_bytes).to_string(),
239 line_number,
240 }
241}
242
243#[cfg(test)]
246mod tests {
247 use super::*;
248
249 #[cfg(not(miri))]
250 #[test]
251 fn test_builder_encoding_hint_utf8() {
252 use std::io::Write;
253 use tempfile::NamedTempFile;
254
255 let mut tmp = NamedTempFile::new().expect("tmp");
256 write!(
257 tmp,
258 "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:a) SELECT 1"
259 )
260 .unwrap();
261 tmp.as_file().sync_all().unwrap();
262
263 let parser = LogParserBuilder::new(tmp.path())
264 .encoding_hint(FileEncodingHint::Utf8)
265 .build()
266 .expect("build");
267 let record = parser.iter().next().unwrap().unwrap();
268 assert_eq!(record.ts, "2025-11-17 16:09:41.123");
269 assert!(record.sql.contains("SELECT 1"));
270 }
271
272 #[cfg(not(miri))]
273 #[test]
274 fn test_builder_file_not_found() {
275 let result = LogParserBuilder::new("/nonexistent/path.log").build();
276 assert!(result.is_err());
277 match result {
278 Err(ParseError::IoError(_)) => {}
279 _ => panic!("Expected IoError on nonexistent file"),
280 }
281 }
282
283 fn build_perf_record(tag_and_body: &str, tail: &str) -> Vec<u8> {
286 let header =
287 b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:alice trxid:789 stmt:0x1 appname:bench) ";
288 let mut v = Vec::new();
289 v.extend_from_slice(header);
290 v.extend_from_slice(tag_and_body.as_bytes());
291 if !tail.is_empty() {
292 v.extend_from_slice(tail.as_bytes());
293 }
294 v
295 }
296
297 #[test]
298 fn performance_metrics_full() {
299 let raw = build_perf_record(
300 "SELECT * FROM T ",
301 "EXECTIME: 10.5(ms) ROWCOUNT: 100(rows) EXEC_ID: 999.",
302 );
303 let rec = parse_record(&raw).unwrap();
304 assert!((rec.exectime - 10.5).abs() < 1e-6);
305 assert_eq!(rec.rowcount, 100);
306 assert_eq!(rec.exec_id, 999);
307 assert_eq!(rec.sql, "SELECT * FROM T ");
308 }
309
310 #[test]
311 fn performance_metrics_no_indicators() {
312 let raw = build_perf_record("SELECT 1;", "");
313 let rec = parse_record(&raw).unwrap();
314 assert_eq!(rec.exectime, 0.0);
315 assert_eq!(rec.rowcount, 0);
316 assert_eq!(rec.exec_id, 0);
317 assert_eq!(rec.sql, "SELECT 1;");
318 }
319
320 #[test]
321 fn performance_metrics_ora_tag_strips_colon_space_prefix() {
322 let raw = build_perf_record(
323 "[ORA] : SELECT 1 FROM DUAL ",
324 "EXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 42.",
325 );
326 let rec = parse_record(&raw).unwrap();
327 assert_eq!(rec.tag.as_deref(), Some("ORA"));
328 assert_eq!(rec.sql, "SELECT 1 FROM DUAL ");
329 assert!((rec.exectime - 5.0).abs() < 1e-6);
330 assert_eq!(rec.rowcount, 1);
331 assert_eq!(rec.exec_id, 42);
332 }
333
334 #[test]
335 fn performance_metrics_ora_tag_no_prefix_unchanged() {
336 let raw = build_perf_record(
337 "[ORA] SELECT 1 FROM DUAL ",
338 "EXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 42.",
339 );
340 let rec = parse_record(&raw).unwrap();
341 assert_eq!(rec.tag.as_deref(), Some("ORA"));
342 assert_eq!(rec.sql, "SELECT 1 FROM DUAL ");
343 }
344
345 #[test]
346 fn performance_metrics_non_ora_tag_keeps_prefix_intact() {
347 let raw = build_perf_record("[SEL] : SELECT 1 ", "EXEC_ID: 7.");
348 let rec = parse_record(&raw).unwrap();
349 assert_eq!(rec.tag.as_deref(), Some("SEL"));
350 assert_eq!(rec.sql, ": SELECT 1 ");
351 }
352
353 #[test]
354 fn performance_metrics_no_tag_keeps_prefix_intact() {
355 let raw = build_perf_record(": SELECT 1 ", "EXEC_ID: 7.");
356 let rec = parse_record(&raw).unwrap();
357 assert!(rec.tag.is_none());
358 assert_eq!(rec.sql, ": SELECT 1 ");
359 }
360
361 #[test]
362 fn performance_metrics_exectime_only() {
363 let raw = build_perf_record("DELETE FROM T; ", "EXECTIME: 3.5(ms)");
364 let rec = parse_record(&raw).unwrap();
365 assert!((rec.exectime - 3.5).abs() < 1e-6);
366 assert_eq!(rec.rowcount, 0);
367 assert_eq!(rec.exec_id, 0);
368 assert_eq!(rec.sql, "DELETE FROM T; ");
369 }
370
371 #[test]
372 fn performance_metrics_rowcount_only() {
373 let raw = build_perf_record("UPDATE T SET A=1; ", "ROWCOUNT: 10(rows)");
374 let rec = parse_record(&raw).unwrap();
375 assert_eq!(rec.exectime, 0.0);
376 assert_eq!(rec.rowcount, 10);
377 assert_eq!(rec.exec_id, 0);
378 }
379
380 #[test]
381 fn performance_metrics_exec_id_only() {
382 let raw = build_perf_record("SELECT 1; ", "EXEC_ID: 42.");
383 let rec = parse_record(&raw).unwrap();
384 assert_eq!(rec.exectime, 0.0);
385 assert_eq!(rec.rowcount, 0);
386 assert_eq!(rec.exec_id, 42);
387 }
388
389 #[test]
390 fn performance_metrics_ora_tag_only_colon_space_sql_empty_after_strip() {
391 let raw = build_perf_record("[ORA] : ", "EXEC_ID: 1.");
392 let rec = parse_record(&raw).unwrap();
393 assert_eq!(rec.tag.as_deref(), Some("ORA"));
394 assert_eq!(rec.sql, "");
395 }
396
397 #[test]
398 fn early_exit_no_dot_suffix() {
399 let raw = build_perf_record("SELECT * FROM users WHERE id = 1;", "");
400 let rec = parse_record(&raw).unwrap();
401 assert_eq!(rec.exectime, 0.0);
402 assert_eq!(rec.rowcount, 0);
403 assert_eq!(rec.exec_id, 0);
404 }
405
406 #[test]
407 fn dot_suffix_no_real_indicators_guarded() {
408 let raw = build_perf_record("SELECT url FROM t WHERE url = 'http://example.com'.", "");
409 let rec = parse_record(&raw).unwrap();
410 assert_eq!(rec.exec_id, 0);
411 assert_eq!(rec.exectime, 0.0);
412 }
413
414 #[test]
415 fn dot_suffix_with_real_indicators() {
416 let raw = build_perf_record(
417 "SELECT 1 FROM T ",
418 "EXECTIME: 2.5(ms) ROWCOUNT: 5(rows) EXEC_ID: 77.",
419 );
420 let rec = parse_record(&raw).unwrap();
421 assert!((rec.exectime - 2.5).abs() < 1e-6);
422 assert_eq!(rec.rowcount, 5);
423 assert_eq!(rec.exec_id, 77);
424 assert_eq!(rec.sql, "SELECT 1 FROM T ");
425 }
426
427 #[test]
428 fn fake_keyword_in_body_plus_real_indicators() {
429 let raw = build_perf_record(
430 "SELECT 'EXECTIME: fake' FROM T ",
431 "EXECTIME: 1.0(ms) ROWCOUNT: 3(rows) EXEC_ID: 55.",
432 );
433 let rec = parse_record(&raw).unwrap();
434 assert!((rec.exectime - 1.0).abs() < 1e-6);
435 assert_eq!(rec.rowcount, 3);
436 assert_eq!(rec.exec_id, 55);
437 assert!(rec.sql.contains("EXECTIME: fake"));
438 }
439
440 #[test]
441 fn multiple_colons_in_body() {
442 let raw = build_perf_record(
443 "SELECT 'http://example.com:8080/path' FROM T ",
444 "EXECTIME: 3.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 99.",
445 );
446 let rec = parse_record(&raw).unwrap();
447 assert!((rec.exectime - 3.0).abs() < 1e-6);
448 assert_eq!(rec.rowcount, 1);
449 assert_eq!(rec.exec_id, 99);
450 assert!(rec.sql.contains("http://example.com:8080/path"));
451 }
452
453 #[test]
454 fn exec_id_only_split_correct() {
455 let raw = build_perf_record("INSERT INTO T VALUES (1); ", "EXEC_ID: 123.");
456 let rec = parse_record(&raw).unwrap();
457 assert_eq!(rec.exec_id, 123);
458 assert_eq!(rec.exectime, 0.0);
459 assert_eq!(rec.rowcount, 0);
460 assert_eq!(rec.sql, "INSERT INTO T VALUES (1); ");
461 }
462
463 fn build_additional_record(line1_body: &str, tail: &str) -> Vec<u8> {
466 let header = b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:alice trxid:789 stmt:0x1 appname:bench) ";
467 let mut v = Vec::new();
468 v.extend_from_slice(header);
469 v.extend_from_slice(line1_body.as_bytes());
470 if !tail.is_empty() {
471 v.extend_from_slice(tail.as_bytes());
472 }
473 v
474 }
475
476 #[test]
477 fn body_without_indicators() {
478 let raw = build_additional_record("SELECT 1;", "");
479 let rec = parse_record(&raw).expect("parse ok");
480 assert_eq!(rec.sql, "SELECT 1;");
481 assert_eq!(rec.exec_id, 0);
482 assert_eq!(rec.exectime, 0.0);
483 assert_eq!(rec.rowcount, 0);
484 }
485
486 #[test]
487 fn indicators_exec_id_only() {
488 let raw = build_additional_record("SELECT 1; ", "EXEC_ID: 42.");
489 let rec = parse_record(&raw).unwrap();
490 assert_eq!(rec.sql, "SELECT 1; ");
491 assert_eq!(rec.exec_id, 42);
492 }
493
494 #[test]
495 fn indicators_rowcount_only() {
496 let raw = build_additional_record("UPDATE T SET A=1; ", "ROWCOUNT: 10(rows)");
497 let rec = parse_record(&raw).unwrap();
498 assert_eq!(rec.sql, "UPDATE T SET A=1; ");
499 assert_eq!(rec.rowcount, 10);
500 }
501
502 #[test]
503 fn indicators_exectime_only() {
504 let raw = build_additional_record("DELETE FROM T; ", "EXECTIME: 3.5(ms)");
505 let rec = parse_record(&raw).unwrap();
506 assert_eq!(rec.sql, "DELETE FROM T; ");
507 assert!((rec.exectime - 3.5).abs() < 1e-6);
508 }
509
510 #[test]
511 fn indicators_permutation_all() {
512 let tail = "ROWCOUNT: 5(rows) EXECTIME: 12.25(ms) EXEC_ID: 999.";
513 let raw = build_additional_record("SELECT * FROM T ", tail);
514 let rec = parse_record(&raw).unwrap();
515 assert_eq!(rec.sql, "SELECT * FROM T ");
516 assert_eq!(rec.rowcount, 5);
517 assert!((rec.exectime - 12.25).abs() < 1e-6);
518 assert_eq!(rec.exec_id, 999);
519 }
520
521 #[test]
522 fn meta_parsing_basic() {
523 let raw = b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:SYSDBA trxid:0 stmt:0x2 appname:cli) SELECT";
524 let rec = parse_record(raw).unwrap();
525 assert_eq!(rec.ep, 2);
526 assert_eq!(rec.sess_id, "0xABC");
527 assert_eq!(rec.thrd_id, "777");
528 assert_eq!(rec.username, "SYSDBA");
529 assert_eq!(rec.trxid, "0");
530 assert_eq!(rec.statement, "0x2");
531 assert_eq!(rec.appname, "cli");
532 }
533
534 #[test]
535 fn meta_parsing_empty_appname() {
536 let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:) X";
537 let rec = parse_record(raw).unwrap();
538 assert_eq!(rec.appname, "");
539 }
540
541 #[test]
542 fn appname_empty_followed_by_ip_colon_single_should_keep_appname_empty() {
543 let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: ip:10.1.1.1) X";
544 let rec = parse_record(raw).unwrap();
545 assert_eq!(rec.appname, "");
546 assert_eq!(rec.client_ip, "10.1.1.1");
547 }
548
549 #[test]
550 fn appname_empty_followed_by_ip_triple_colon_should_keep_appname_empty() {
551 let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: ip:::ffff:10.3.100.68) X";
552 let rec = parse_record(raw).unwrap();
553 assert_eq!(rec.appname, "");
554 assert_eq!(rec.client_ip, "::ffff:10.3.100.68");
555 }
556
557 #[test]
558 fn meta_parsing_gb18030_username() {
559 use ::encoding::all::GB18030;
560 use ::encoding::{EncoderTrap, Encoding};
561
562 let username = "用户";
563 let user_bytes = GB18030
564 .encode(username, EncoderTrap::Strict)
565 .expect("encode");
566
567 let mut raw: Vec<u8> = b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
568 raw.extend_from_slice(&user_bytes);
569 raw.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT");
570
571 let rec = parse_record(&raw).unwrap();
572 assert_eq!(rec.username, username);
573 }
574
575 #[test]
576 fn tag_extraction_and_body_trim() {
577 let raw = b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:u trxid:3 stmt:4 appname:bench) [SEL] SELECT 1; EXEC_ID: 42.";
578 let rec = parse_record(raw).unwrap();
579 assert_eq!(rec.tag.as_deref(), Some("SEL"));
580 assert_eq!(rec.sql, "SELECT 1; ");
581 }
582
583 #[test]
584 #[cfg(not(miri))]
585 fn file_encoding_detection_gb18030() {
586 use ::encoding::all::GB18030;
587 use ::encoding::{EncoderTrap, Encoding};
588 use std::io::Write;
589 use tempfile::NamedTempFile;
590
591 let username = "用户";
592 let user_bytes = GB18030
593 .encode(username, EncoderTrap::Strict)
594 .expect("encode");
595
596 let mut line: Vec<u8> =
597 b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
598 line.extend_from_slice(&user_bytes);
599 line.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT\n");
600
601 let mut tmp = NamedTempFile::new().expect("tmp");
602 tmp.write_all(&line).expect("write");
603 tmp.as_file().sync_all().expect("sync");
604
605 let parser = LogParserBuilder::new(tmp.path()).build().expect("open");
606 let rec = parser.iter().next().unwrap().unwrap();
607 assert_eq!(rec.username, username);
608 }
609
610 #[test]
611 fn find_indicators_split_exectime_keyword_in_sql_body_no_indicators() {
612 let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT * FROM t WHERE col = 'EXECTIME: slow'\n";
613 let record = parse_record(raw.as_bytes()).unwrap();
614 assert_eq!(record.sql, "SELECT * FROM t WHERE col = 'EXECTIME: slow'\n");
615 assert_eq!(record.exec_id, 0);
616 assert_eq!(record.exectime, 0.0);
617 }
618
619 #[test]
620 fn find_indicators_split_rowcount_keyword_in_sql_body_no_indicators() {
621 let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT * FROM t WHERE cnt = 'ROWCOUNT: many'\n";
622 let record = parse_record(raw.as_bytes()).unwrap();
623 assert_eq!(record.sql, "SELECT * FROM t WHERE cnt = 'ROWCOUNT: many'\n");
624 assert_eq!(record.rowcount, 0);
625 }
626
627 #[test]
628 fn find_indicators_split_exec_id_keyword_in_sql_body_no_indicators() {
629 let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT EXEC_ID: foo FROM dual\n";
630 let record = parse_record(raw.as_bytes()).unwrap();
631 assert_eq!(record.sql, "SELECT EXEC_ID: foo FROM dual\n");
632 assert_eq!(record.exec_id, 0);
633 }
634
635 #[test]
636 fn find_indicators_split_keyword_in_body_plus_real_indicators() {
637 let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT EXECTIME: slow\nEXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 99.\n";
638 let record = parse_record(raw.as_bytes()).unwrap();
639 assert!((record.exectime - 5.0).abs() < 1e-6);
640 assert!(record.sql.contains("SELECT"));
641 }
642
643 #[test]
644 fn find_indicators_split_multiple_keywords_in_body_no_indicators() {
645 let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) EXECTIME: x ROWCOUNT: y EXEC_ID: z\n";
646 let record = parse_record(raw.as_bytes()).unwrap();
647 assert_eq!(record.sql, "EXECTIME: x ROWCOUNT: y EXEC_ID: z\n");
648 assert_eq!(record.exec_id, 0);
649 assert_eq!(record.exectime, 0.0);
650 }
651
652 #[test]
653 #[cfg(not(miri))]
654 fn encoding_detection_gb18030_after_64kb_boundary() {
655 use ::encoding::all::GB18030;
656 use ::encoding::{EncoderTrap, Encoding};
657 use std::io::Write;
658 use tempfile::NamedTempFile;
659
660 let ascii_record = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:ascii trxid:0 stmt:0 appname:app) SELECT 1;\n";
661 let repeat_count = 65536 / ascii_record.len() + 2;
662
663 let username = "用户";
664 let user_bytes = GB18030.encode(username, EncoderTrap::Strict).unwrap();
665 let mut gb_line: Vec<u8> = b"2025-11-17 16:09:42.000 (EP[0] sess:2 thrd:2 user:".to_vec();
666 gb_line.extend_from_slice(&user_bytes);
667 gb_line.extend_from_slice(b" trxid:0 stmt:0 appname:app) SELECT 2;\n");
668
669 let mut tmp = NamedTempFile::new().unwrap();
670 for _ in 0..repeat_count {
671 tmp.write_all(ascii_record.as_bytes()).unwrap();
672 }
673 tmp.write_all(&gb_line).unwrap();
674 tmp.as_file().sync_all().unwrap();
675
676 let parser = LogParserBuilder::new(tmp.path()).build().unwrap();
677 let records: Vec<_> = parser.iter().collect();
678 let last = records.last().unwrap().as_ref().unwrap();
679 assert_eq!(last.username, username);
680 }
681
682 #[test]
683 #[cfg(not(miri))]
684 fn file_encoding_detection_utf8() {
685 use std::io::Write;
686 use tempfile::NamedTempFile;
687
688 let username = "用户";
689 let user_bytes = username.as_bytes();
690
691 let mut line: Vec<u8> =
692 b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
693 line.extend_from_slice(user_bytes);
694 line.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT\n");
695
696 let mut tmp = NamedTempFile::new().expect("tmp");
697 tmp.write_all(&line).expect("write");
698 tmp.as_file().sync_all().expect("sync");
699
700 let parser = LogParserBuilder::new(tmp.path()).build().expect("open");
701 let rec = parser.iter().next().unwrap().unwrap();
702 assert_eq!(rec.username, username);
703 }
704
705 #[test]
709 fn parse_record_single_line_no_newline() {
710 let raw =
711 b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:U trxid:3 stmt:4 appname:a) SELECT 1";
712 let rec = parse_record(raw).unwrap();
713 assert_eq!(rec.ts, "2025-11-17 16:09:41.123");
714 assert!(rec.sql.contains("SELECT"));
715 }
716
717 #[test]
719 fn parse_record_no_meta_open_paren() {
720 let raw = b"2025-11-17 16:09:41.123 NO_OPEN_PAREN_AT_ALL_HERE body";
721 let result = parse_record(raw);
722 assert!(result.is_err());
723 }
724
725 #[test]
727 fn parse_record_no_meta_close_paren() {
728 let raw = b"2025-11-17 16:09:41.123 (UNCLOSED_META body";
729 let result = parse_record(raw);
730 assert!(result.is_err());
731 }
732
733 #[test]
736 fn meta_closing_paren_without_space_then_body_on_next_line() {
737 let content = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:app)\nSELECT * FROM T\nEXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 7.\n";
738 let rec = parse_record(content).expect("parse ok");
739 assert!(rec.sql.trim_start().starts_with("SELECT * FROM T"));
740 assert_eq!(rec.exec_id, 7);
741 }
742
743 #[test]
744 fn appname_empty_then_take_next_token_as_appname_not_ip() {
745 let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: [SEL] ip:::ffff:10.0.0.1) X";
746 let rec = parse_record(raw).unwrap();
747 assert_eq!(rec.appname, "[SEL]");
748 assert_eq!(rec.client_ip, "::ffff:10.0.0.1");
749 }
750
751 #[test]
752 fn indicators_not_strictly_formatted_should_not_split_body() {
753 let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:app) SELECT 1; EXEC_ID:123";
754 let rec = parse_record(raw).unwrap();
755 assert_eq!(rec.exec_id, 0);
757 assert!(rec.sql.ends_with("EXEC_ID:123"));
758 }
759
760 #[test]
763 fn test_parse_record_timestamp_validation() {
764 use crate::error::ParseError;
765
766 let valid = b"2025-11-17 16:09:41.123 (EP[0]) SELECT";
767 let result = parse_record(valid);
768 assert!(result.is_ok());
769
770 let bad_ts_no_meta = b"2025-11-17 16:09:41.123 INVALID NO META";
771 let result = parse_record(bad_ts_no_meta);
772 assert!(matches!(result, Err(ParseError::InvalidFormat { .. })));
773
774 let short = b"2025-11-17 16:0";
775 let result = parse_record(short);
776 assert!(matches!(result, Err(ParseError::InvalidFormat { .. })));
777 }
778}