1pub mod builder;
2pub(crate) mod encoding;
3pub mod iterator;
4
5pub use builder::LogParserBuilder;
6pub use encoding::FileEncodingHint;
7pub use iterator::LogIterator;
8
9use memchr::memmem::Finder;
10use memchr::{memchr, memrchr};
11use std::fs::File;
12use std::path::PathBuf;
13use std::str;
14use std::sync::LazyLock;
15
16use crate::error::ParseError;
17use crate::record::{self, Sqllog};
18use ::encoding::all::GB18030;
19use ::encoding::{DecoderTrap, Encoding};
20
21static FINDER_CLOSE_META: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b") "));
23
24pub struct LogParser {
29 pub(super) path: PathBuf,
30 pub(super) encoding: FileEncodingHint,
31}
32
33impl LogParser {
34 pub fn iter(&self) -> Result<LogIterator, ParseError> {
36 let file = File::open(&self.path).map_err(|e| ParseError::IoError(e.to_string()))?;
37 Ok(LogIterator::new(file, self.encoding))
38 }
39}
40
41#[cfg(test)]
45pub(crate) fn parse_record(record_bytes: &[u8]) -> Result<Sqllog, ParseError> {
46 parse_record_with_hint(record_bytes, FileEncodingHint::Auto, 0)
47}
48
49pub(super) fn parse_record_with_hint(
51 record_bytes: &[u8],
52 encoding_hint: FileEncodingHint,
53 line_number: u64,
54) -> Result<Sqllog, ParseError> {
55 let is_multiline = memchr(b'\n', record_bytes).is_some();
57
58 let first_line = if is_multiline {
60 match memchr(b'\n', record_bytes) {
61 Some(idx) => {
62 let mut line = &record_bytes[..idx];
63 if line.ends_with(b"\r") {
64 line = &line[..line.len() - 1];
65 }
66 line
67 }
68 None => {
69 let mut line = record_bytes;
70 if line.ends_with(b"\r") {
71 line = &line[..line.len() - 1];
72 }
73 line
74 }
75 }
76 } else {
77 let mut line = record_bytes;
78 if line.ends_with(b"\r") {
79 line = &line[..line.len() - 1];
80 }
81 line
82 };
83
84 if first_line.len() < 23 {
86 return Err(make_invalid_format_error(first_line, line_number));
87 }
88 let ts = match str::from_utf8(&first_line[0..23]) {
89 Ok(s) => s.to_string(),
90 Err(_) => return Err(make_invalid_format_error(first_line, line_number)),
91 };
92
93 let meta_start = match memchr(b'(', &first_line[23..]) {
95 Some(idx) => 23 + idx,
96 None => return Err(make_invalid_format_error(first_line, line_number)),
97 };
98
99 let meta_end = match FINDER_CLOSE_META.find(&first_line[meta_start..]) {
100 Some(idx) => Some(meta_start + idx),
101 None => memrchr(b')', &first_line[meta_start..]).map(|idx| meta_start + idx),
102 };
103
104 let meta_end = match meta_end {
105 Some(idx) => idx,
106 None => return Err(make_invalid_format_error(first_line, line_number)),
107 };
108
109 let meta_bytes = &first_line[meta_start + 1..meta_end];
110
111 let (ep, sess_id, thrd_id, username, trxid, statement, appname, client_ip) = match encoding_hint
113 {
114 FileEncodingHint::Utf8 => record::parse_meta_from_bytes(meta_bytes),
115 FileEncodingHint::Auto => {
116 match str::from_utf8(meta_bytes) {
118 Ok(_) => record::parse_meta_from_bytes(meta_bytes),
119 Err(_) => match GB18030.decode(meta_bytes, DecoderTrap::Strict) {
120 Ok(decoded) => record::parse_meta_from_bytes(decoded.as_bytes()),
121 Err(_) => {
122 let lossy = String::from_utf8_lossy(meta_bytes).into_owned();
123 record::parse_meta_from_bytes(lossy.as_bytes())
124 }
125 },
126 }
127 }
128 FileEncodingHint::Gb18030 => match GB18030.decode(meta_bytes, DecoderTrap::Strict) {
129 Ok(decoded) => record::parse_meta_from_bytes(decoded.as_bytes()),
130 Err(_) => {
131 let lossy = String::from_utf8_lossy(meta_bytes).into_owned();
132 record::parse_meta_from_bytes(lossy.as_bytes())
133 }
134 },
135 };
136
137 let body_start_in_first_line = meta_end + 1;
139
140 let content_start = if body_start_in_first_line < first_line.len()
141 && first_line[body_start_in_first_line] == b' '
142 {
143 body_start_in_first_line + 1
144 } else {
145 body_start_in_first_line
146 };
147
148 let mut tag: Option<String> = None;
150 let content_slice = if content_start < record_bytes.len() {
151 let mut s = &record_bytes[content_start..];
152 if !s.is_empty()
153 && s[0] == b'['
154 && let Some(end_idx) = memchr(b']', s)
155 && end_idx >= 1
156 {
157 let inner = &s[1..end_idx];
158 if !inner.contains(&b' ') && inner.len() <= 32 {
159 tag = match encoding_hint {
160 FileEncodingHint::Utf8 => str::from_utf8(inner).ok().map(|t| t.to_string()),
161 FileEncodingHint::Auto => str::from_utf8(inner)
162 .ok()
163 .map(|t| t.to_string())
164 .or_else(|| GB18030.decode(inner, DecoderTrap::Strict).ok()),
165 FileEncodingHint::Gb18030 => GB18030
166 .decode(inner, DecoderTrap::Strict)
167 .ok()
168 .or_else(|| str::from_utf8(inner).ok().map(|s| s.to_string())),
169 };
170 s = &s[end_idx + 1..];
172 let mut skip = 0usize;
173 while skip < s.len() && s[skip].is_ascii_whitespace() {
174 skip += 1;
175 }
176 s = &s[skip..];
177 }
178 }
179 s
180 } else {
181 &[] as &[u8]
182 };
183
184 let split = record::find_indicators_split(content_slice);
186 let body_bytes = &content_slice[..split];
187 let ind_bytes = &content_slice[split..];
188
189 let sql_raw = match encoding_hint {
191 FileEncodingHint::Utf8 => String::from_utf8_lossy(body_bytes).into_owned(),
192 FileEncodingHint::Auto => match str::from_utf8(body_bytes) {
193 Ok(s) => s.to_string(),
194 Err(_) => match GB18030.decode(body_bytes, DecoderTrap::Strict) {
195 Ok(s) => s,
196 Err(_) => String::from_utf8_lossy(body_bytes).into_owned(),
197 },
198 },
199 FileEncodingHint::Gb18030 => match GB18030.decode(body_bytes, DecoderTrap::Strict) {
200 Ok(s) => s,
201 Err(_) => String::from_utf8_lossy(body_bytes).into_owned(),
202 },
203 };
204
205 let sql = if tag.as_deref() == Some("ORA") {
207 sql_raw.strip_prefix(": ").unwrap_or(&sql_raw).to_string()
208 } else {
209 sql_raw
210 };
211
212 let (exectime, rowcount, exec_id) = record::parse_indicators_from_bytes(ind_bytes);
214
215 Ok(Sqllog {
216 ts,
217 tag,
218 ep,
219 sess_id,
220 thrd_id,
221 username,
222 trxid,
223 statement,
224 appname,
225 client_ip,
226 sql,
227 exectime,
228 rowcount,
229 exec_id,
230 })
231}
232
233#[cold]
234fn make_invalid_format_error(raw_bytes: &[u8], line_number: u64) -> ParseError {
235 ParseError::InvalidFormat {
236 raw: String::from_utf8_lossy(raw_bytes).to_string(),
237 line_number,
238 }
239}
240
241#[cfg(test)]
244mod tests {
245 use super::*;
246
247 #[cfg(not(miri))]
248 #[test]
249 fn test_builder_encoding_hint_utf8() {
250 use std::io::Write;
251 use tempfile::NamedTempFile;
252
253 let mut tmp = NamedTempFile::new().expect("tmp");
254 write!(
255 tmp,
256 "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:a) SELECT 1"
257 )
258 .unwrap();
259 tmp.as_file().sync_all().unwrap();
260
261 let parser = LogParserBuilder::new(tmp.path())
262 .encoding_hint(FileEncodingHint::Utf8)
263 .build()
264 .expect("build");
265 let record = parser.iter().unwrap().next().unwrap().unwrap();
266 assert_eq!(record.ts, "2025-11-17 16:09:41.123");
267 assert!(record.sql.contains("SELECT 1"));
268 }
269
270 #[cfg(not(miri))]
271 #[test]
272 fn test_builder_file_not_found() {
273 let result = LogParserBuilder::new("/nonexistent/path.log").build();
274 assert!(result.is_err());
275 match result {
276 Err(ParseError::IoError(_)) => {}
277 _ => panic!("Expected IoError on nonexistent file"),
278 }
279 }
280
281 fn build_perf_record(tag_and_body: &str, tail: &str) -> Vec<u8> {
284 let header =
285 b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:alice trxid:789 stmt:0x1 appname:bench) ";
286 let mut v = Vec::new();
287 v.extend_from_slice(header);
288 v.extend_from_slice(tag_and_body.as_bytes());
289 if !tail.is_empty() {
290 v.extend_from_slice(tail.as_bytes());
291 }
292 v
293 }
294
295 #[test]
296 fn performance_metrics_full() {
297 let raw = build_perf_record(
298 "SELECT * FROM T ",
299 "EXECTIME: 10.5(ms) ROWCOUNT: 100(rows) EXEC_ID: 999.",
300 );
301 let rec = parse_record(&raw).unwrap();
302 assert!((rec.exectime - 10.5).abs() < 1e-6);
303 assert_eq!(rec.rowcount, 100);
304 assert_eq!(rec.exec_id, 999);
305 assert_eq!(rec.sql, "SELECT * FROM T ");
306 }
307
308 #[test]
309 fn performance_metrics_no_indicators() {
310 let raw = build_perf_record("SELECT 1;", "");
311 let rec = parse_record(&raw).unwrap();
312 assert_eq!(rec.exectime, 0.0);
313 assert_eq!(rec.rowcount, 0);
314 assert_eq!(rec.exec_id, 0);
315 assert_eq!(rec.sql, "SELECT 1;");
316 }
317
318 #[test]
319 fn performance_metrics_ora_tag_strips_colon_space_prefix() {
320 let raw = build_perf_record(
321 "[ORA] : SELECT 1 FROM DUAL ",
322 "EXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 42.",
323 );
324 let rec = parse_record(&raw).unwrap();
325 assert_eq!(rec.tag.as_deref(), Some("ORA"));
326 assert_eq!(rec.sql, "SELECT 1 FROM DUAL ");
327 assert!((rec.exectime - 5.0).abs() < 1e-6);
328 assert_eq!(rec.rowcount, 1);
329 assert_eq!(rec.exec_id, 42);
330 }
331
332 #[test]
333 fn performance_metrics_ora_tag_no_prefix_unchanged() {
334 let raw = build_perf_record(
335 "[ORA] SELECT 1 FROM DUAL ",
336 "EXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 42.",
337 );
338 let rec = parse_record(&raw).unwrap();
339 assert_eq!(rec.tag.as_deref(), Some("ORA"));
340 assert_eq!(rec.sql, "SELECT 1 FROM DUAL ");
341 }
342
343 #[test]
344 fn performance_metrics_non_ora_tag_keeps_prefix_intact() {
345 let raw = build_perf_record("[SEL] : SELECT 1 ", "EXEC_ID: 7.");
346 let rec = parse_record(&raw).unwrap();
347 assert_eq!(rec.tag.as_deref(), Some("SEL"));
348 assert_eq!(rec.sql, ": SELECT 1 ");
349 }
350
351 #[test]
352 fn performance_metrics_no_tag_keeps_prefix_intact() {
353 let raw = build_perf_record(": SELECT 1 ", "EXEC_ID: 7.");
354 let rec = parse_record(&raw).unwrap();
355 assert!(rec.tag.is_none());
356 assert_eq!(rec.sql, ": SELECT 1 ");
357 }
358
359 #[test]
360 fn performance_metrics_exectime_only() {
361 let raw = build_perf_record("DELETE FROM T; ", "EXECTIME: 3.5(ms)");
362 let rec = parse_record(&raw).unwrap();
363 assert!((rec.exectime - 3.5).abs() < 1e-6);
364 assert_eq!(rec.rowcount, 0);
365 assert_eq!(rec.exec_id, 0);
366 assert_eq!(rec.sql, "DELETE FROM T; ");
367 }
368
369 #[test]
370 fn performance_metrics_rowcount_only() {
371 let raw = build_perf_record("UPDATE T SET A=1; ", "ROWCOUNT: 10(rows)");
372 let rec = parse_record(&raw).unwrap();
373 assert_eq!(rec.exectime, 0.0);
374 assert_eq!(rec.rowcount, 10);
375 assert_eq!(rec.exec_id, 0);
376 }
377
378 #[test]
379 fn performance_metrics_exec_id_only() {
380 let raw = build_perf_record("SELECT 1; ", "EXEC_ID: 42.");
381 let rec = parse_record(&raw).unwrap();
382 assert_eq!(rec.exectime, 0.0);
383 assert_eq!(rec.rowcount, 0);
384 assert_eq!(rec.exec_id, 42);
385 }
386
387 #[test]
388 fn performance_metrics_ora_tag_only_colon_space_sql_empty_after_strip() {
389 let raw = build_perf_record("[ORA] : ", "EXEC_ID: 1.");
390 let rec = parse_record(&raw).unwrap();
391 assert_eq!(rec.tag.as_deref(), Some("ORA"));
392 assert_eq!(rec.sql, "");
393 }
394
395 #[test]
396 fn early_exit_no_dot_suffix() {
397 let raw = build_perf_record("SELECT * FROM users WHERE id = 1;", "");
398 let rec = parse_record(&raw).unwrap();
399 assert_eq!(rec.exectime, 0.0);
400 assert_eq!(rec.rowcount, 0);
401 assert_eq!(rec.exec_id, 0);
402 }
403
404 #[test]
405 fn dot_suffix_no_real_indicators_guarded() {
406 let raw = build_perf_record("SELECT url FROM t WHERE url = 'http://example.com'.", "");
407 let rec = parse_record(&raw).unwrap();
408 assert_eq!(rec.exec_id, 0);
409 assert_eq!(rec.exectime, 0.0);
410 }
411
412 #[test]
413 fn dot_suffix_with_real_indicators() {
414 let raw = build_perf_record(
415 "SELECT 1 FROM T ",
416 "EXECTIME: 2.5(ms) ROWCOUNT: 5(rows) EXEC_ID: 77.",
417 );
418 let rec = parse_record(&raw).unwrap();
419 assert!((rec.exectime - 2.5).abs() < 1e-6);
420 assert_eq!(rec.rowcount, 5);
421 assert_eq!(rec.exec_id, 77);
422 assert_eq!(rec.sql, "SELECT 1 FROM T ");
423 }
424
425 #[test]
426 fn fake_keyword_in_body_plus_real_indicators() {
427 let raw = build_perf_record(
428 "SELECT 'EXECTIME: fake' FROM T ",
429 "EXECTIME: 1.0(ms) ROWCOUNT: 3(rows) EXEC_ID: 55.",
430 );
431 let rec = parse_record(&raw).unwrap();
432 assert!((rec.exectime - 1.0).abs() < 1e-6);
433 assert_eq!(rec.rowcount, 3);
434 assert_eq!(rec.exec_id, 55);
435 assert!(rec.sql.contains("EXECTIME: fake"));
436 }
437
438 #[test]
439 fn multiple_colons_in_body() {
440 let raw = build_perf_record(
441 "SELECT 'http://example.com:8080/path' FROM T ",
442 "EXECTIME: 3.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 99.",
443 );
444 let rec = parse_record(&raw).unwrap();
445 assert!((rec.exectime - 3.0).abs() < 1e-6);
446 assert_eq!(rec.rowcount, 1);
447 assert_eq!(rec.exec_id, 99);
448 assert!(rec.sql.contains("http://example.com:8080/path"));
449 }
450
451 #[test]
452 fn exec_id_only_split_correct() {
453 let raw = build_perf_record("INSERT INTO T VALUES (1); ", "EXEC_ID: 123.");
454 let rec = parse_record(&raw).unwrap();
455 assert_eq!(rec.exec_id, 123);
456 assert_eq!(rec.exectime, 0.0);
457 assert_eq!(rec.rowcount, 0);
458 assert_eq!(rec.sql, "INSERT INTO T VALUES (1); ");
459 }
460
461 fn build_additional_record(line1_body: &str, tail: &str) -> Vec<u8> {
464 let header = b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:alice trxid:789 stmt:0x1 appname:bench) ";
465 let mut v = Vec::new();
466 v.extend_from_slice(header);
467 v.extend_from_slice(line1_body.as_bytes());
468 if !tail.is_empty() {
469 v.extend_from_slice(tail.as_bytes());
470 }
471 v
472 }
473
474 #[test]
475 fn body_without_indicators() {
476 let raw = build_additional_record("SELECT 1;", "");
477 let rec = parse_record(&raw).expect("parse ok");
478 assert_eq!(rec.sql, "SELECT 1;");
479 assert_eq!(rec.exec_id, 0);
480 assert_eq!(rec.exectime, 0.0);
481 assert_eq!(rec.rowcount, 0);
482 }
483
484 #[test]
485 fn indicators_exec_id_only() {
486 let raw = build_additional_record("SELECT 1; ", "EXEC_ID: 42.");
487 let rec = parse_record(&raw).unwrap();
488 assert_eq!(rec.sql, "SELECT 1; ");
489 assert_eq!(rec.exec_id, 42);
490 }
491
492 #[test]
493 fn indicators_rowcount_only() {
494 let raw = build_additional_record("UPDATE T SET A=1; ", "ROWCOUNT: 10(rows)");
495 let rec = parse_record(&raw).unwrap();
496 assert_eq!(rec.sql, "UPDATE T SET A=1; ");
497 assert_eq!(rec.rowcount, 10);
498 }
499
500 #[test]
501 fn indicators_exectime_only() {
502 let raw = build_additional_record("DELETE FROM T; ", "EXECTIME: 3.5(ms)");
503 let rec = parse_record(&raw).unwrap();
504 assert_eq!(rec.sql, "DELETE FROM T; ");
505 assert!((rec.exectime - 3.5).abs() < 1e-6);
506 }
507
508 #[test]
509 fn indicators_permutation_all() {
510 let tail = "ROWCOUNT: 5(rows) EXECTIME: 12.25(ms) EXEC_ID: 999.";
511 let raw = build_additional_record("SELECT * FROM T ", tail);
512 let rec = parse_record(&raw).unwrap();
513 assert_eq!(rec.sql, "SELECT * FROM T ");
514 assert_eq!(rec.rowcount, 5);
515 assert!((rec.exectime - 12.25).abs() < 1e-6);
516 assert_eq!(rec.exec_id, 999);
517 }
518
519 #[test]
520 fn meta_parsing_basic() {
521 let raw = b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:SYSDBA trxid:0 stmt:0x2 appname:cli) SELECT";
522 let rec = parse_record(raw).unwrap();
523 assert_eq!(rec.ep, 2);
524 assert_eq!(rec.sess_id, "0xABC");
525 assert_eq!(rec.thrd_id, "777");
526 assert_eq!(rec.username, "SYSDBA");
527 assert_eq!(rec.trxid, "0");
528 assert_eq!(rec.statement, "0x2");
529 assert_eq!(rec.appname, "cli");
530 }
531
532 #[test]
533 fn meta_parsing_empty_appname() {
534 let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:) X";
535 let rec = parse_record(raw).unwrap();
536 assert_eq!(rec.appname, "");
537 }
538
539 #[test]
540 fn appname_empty_followed_by_ip_colon_single_should_keep_appname_empty() {
541 let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: ip:10.1.1.1) X";
542 let rec = parse_record(raw).unwrap();
543 assert_eq!(rec.appname, "");
544 assert_eq!(rec.client_ip, "10.1.1.1");
545 }
546
547 #[test]
548 fn appname_empty_followed_by_ip_triple_colon_should_keep_appname_empty() {
549 let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: ip:::ffff:10.3.100.68) X";
550 let rec = parse_record(raw).unwrap();
551 assert_eq!(rec.appname, "");
552 assert_eq!(rec.client_ip, "::ffff:10.3.100.68");
553 }
554
555 #[test]
556 fn meta_parsing_gb18030_username() {
557 use ::encoding::all::GB18030;
558 use ::encoding::{EncoderTrap, Encoding};
559
560 let username = "用户";
561 let user_bytes = GB18030
562 .encode(username, EncoderTrap::Strict)
563 .expect("encode");
564
565 let mut raw: Vec<u8> = b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
566 raw.extend_from_slice(&user_bytes);
567 raw.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT");
568
569 let rec = parse_record(&raw).unwrap();
570 assert_eq!(rec.username, username);
571 }
572
573 #[test]
574 fn tag_extraction_and_body_trim() {
575 let raw = b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:u trxid:3 stmt:4 appname:bench) [SEL] SELECT 1; EXEC_ID: 42.";
576 let rec = parse_record(raw).unwrap();
577 assert_eq!(rec.tag.as_deref(), Some("SEL"));
578 assert_eq!(rec.sql, "SELECT 1; ");
579 }
580
581 #[test]
582 #[cfg(not(miri))]
583 fn file_encoding_detection_gb18030() {
584 use ::encoding::all::GB18030;
585 use ::encoding::{EncoderTrap, Encoding};
586 use std::io::Write;
587 use tempfile::NamedTempFile;
588
589 let username = "用户";
590 let user_bytes = GB18030
591 .encode(username, EncoderTrap::Strict)
592 .expect("encode");
593
594 let mut line: Vec<u8> =
595 b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
596 line.extend_from_slice(&user_bytes);
597 line.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT\n");
598
599 let mut tmp = NamedTempFile::new().expect("tmp");
600 tmp.write_all(&line).expect("write");
601 tmp.as_file().sync_all().expect("sync");
602
603 let parser = LogParserBuilder::new(tmp.path()).build().expect("open");
604 let rec = parser.iter().unwrap().next().unwrap().unwrap();
605 assert_eq!(rec.username, username);
606 }
607
608 #[test]
609 fn find_indicators_split_exectime_keyword_in_sql_body_no_indicators() {
610 let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT * FROM t WHERE col = 'EXECTIME: slow'\n";
611 let record = parse_record(raw.as_bytes()).unwrap();
612 assert_eq!(record.sql, "SELECT * FROM t WHERE col = 'EXECTIME: slow'\n");
613 assert_eq!(record.exec_id, 0);
614 assert_eq!(record.exectime, 0.0);
615 }
616
617 #[test]
618 fn find_indicators_split_rowcount_keyword_in_sql_body_no_indicators() {
619 let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT * FROM t WHERE cnt = 'ROWCOUNT: many'\n";
620 let record = parse_record(raw.as_bytes()).unwrap();
621 assert_eq!(record.sql, "SELECT * FROM t WHERE cnt = 'ROWCOUNT: many'\n");
622 assert_eq!(record.rowcount, 0);
623 }
624
625 #[test]
626 fn find_indicators_split_exec_id_keyword_in_sql_body_no_indicators() {
627 let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT EXEC_ID: foo FROM dual\n";
628 let record = parse_record(raw.as_bytes()).unwrap();
629 assert_eq!(record.sql, "SELECT EXEC_ID: foo FROM dual\n");
630 assert_eq!(record.exec_id, 0);
631 }
632
633 #[test]
634 fn find_indicators_split_keyword_in_body_plus_real_indicators() {
635 let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT EXECTIME: slow\nEXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 99.\n";
636 let record = parse_record(raw.as_bytes()).unwrap();
637 assert!((record.exectime - 5.0).abs() < 1e-6);
638 assert!(record.sql.contains("SELECT"));
639 }
640
641 #[test]
642 fn find_indicators_split_multiple_keywords_in_body_no_indicators() {
643 let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) EXECTIME: x ROWCOUNT: y EXEC_ID: z\n";
644 let record = parse_record(raw.as_bytes()).unwrap();
645 assert_eq!(record.sql, "EXECTIME: x ROWCOUNT: y EXEC_ID: z\n");
646 assert_eq!(record.exec_id, 0);
647 assert_eq!(record.exectime, 0.0);
648 }
649
650 #[test]
651 #[cfg(not(miri))]
652 fn encoding_detection_gb18030_after_64kb_boundary() {
653 use ::encoding::all::GB18030;
654 use ::encoding::{EncoderTrap, Encoding};
655 use std::io::Write;
656 use tempfile::NamedTempFile;
657
658 let ascii_record = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:ascii trxid:0 stmt:0 appname:app) SELECT 1;\n";
659 let repeat_count = 65536 / ascii_record.len() + 2;
660
661 let username = "用户";
662 let user_bytes = GB18030.encode(username, EncoderTrap::Strict).unwrap();
663 let mut gb_line: Vec<u8> = b"2025-11-17 16:09:42.000 (EP[0] sess:2 thrd:2 user:".to_vec();
664 gb_line.extend_from_slice(&user_bytes);
665 gb_line.extend_from_slice(b" trxid:0 stmt:0 appname:app) SELECT 2;\n");
666
667 let mut tmp = NamedTempFile::new().unwrap();
668 for _ in 0..repeat_count {
669 tmp.write_all(ascii_record.as_bytes()).unwrap();
670 }
671 tmp.write_all(&gb_line).unwrap();
672 tmp.as_file().sync_all().unwrap();
673
674 let parser = LogParserBuilder::new(tmp.path()).build().unwrap();
675 let records: Vec<_> = parser.iter().unwrap().collect();
676 let last = records.last().unwrap().as_ref().unwrap();
677 assert_eq!(last.username, username);
678 }
679
680 #[test]
681 #[cfg(not(miri))]
682 fn file_encoding_detection_utf8() {
683 use std::io::Write;
684 use tempfile::NamedTempFile;
685
686 let username = "用户";
687 let user_bytes = username.as_bytes();
688
689 let mut line: Vec<u8> =
690 b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
691 line.extend_from_slice(user_bytes);
692 line.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT\n");
693
694 let mut tmp = NamedTempFile::new().expect("tmp");
695 tmp.write_all(&line).expect("write");
696 tmp.as_file().sync_all().expect("sync");
697
698 let parser = LogParserBuilder::new(tmp.path()).build().expect("open");
699 let rec = parser.iter().unwrap().next().unwrap().unwrap();
700 assert_eq!(rec.username, username);
701 }
702
703 #[test]
707 fn parse_record_single_line_no_newline() {
708 let raw =
709 b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:U trxid:3 stmt:4 appname:a) SELECT 1";
710 let rec = parse_record(raw).unwrap();
711 assert_eq!(rec.ts, "2025-11-17 16:09:41.123");
712 assert!(rec.sql.contains("SELECT"));
713 }
714
715 #[test]
717 fn parse_record_no_meta_open_paren() {
718 let raw = b"2025-11-17 16:09:41.123 NO_OPEN_PAREN_AT_ALL_HERE body";
719 let result = parse_record(raw);
720 assert!(result.is_err());
721 }
722
723 #[test]
725 fn parse_record_no_meta_close_paren() {
726 let raw = b"2025-11-17 16:09:41.123 (UNCLOSED_META body";
727 let result = parse_record(raw);
728 assert!(result.is_err());
729 }
730
731 #[test]
734 fn meta_closing_paren_without_space_then_body_on_next_line() {
735 let content = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:app)\nSELECT * FROM T\nEXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 7.\n";
736 let rec = parse_record(content).expect("parse ok");
737 assert!(rec.sql.trim_start().starts_with("SELECT * FROM T"));
738 assert_eq!(rec.exec_id, 7);
739 }
740
741 #[test]
742 fn appname_empty_then_take_next_token_as_appname_not_ip() {
743 let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: [SEL] ip:::ffff:10.0.0.1) X";
744 let rec = parse_record(raw).unwrap();
745 assert_eq!(rec.appname, "[SEL]");
746 assert_eq!(rec.client_ip, "::ffff:10.0.0.1");
747 }
748
749 #[test]
750 fn indicators_not_strictly_formatted_should_not_split_body() {
751 let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:app) SELECT 1; EXEC_ID:123";
752 let rec = parse_record(raw).unwrap();
753 assert_eq!(rec.exec_id, 0);
755 assert!(rec.sql.ends_with("EXEC_ID:123"));
756 }
757
758 #[test]
761 fn test_parse_record_timestamp_validation() {
762 use crate::error::ParseError;
763
764 let valid = b"2025-11-17 16:09:41.123 (EP[0]) SELECT";
765 let result = parse_record(valid);
766 assert!(result.is_ok());
767
768 let bad_ts_no_meta = b"2025-11-17 16:09:41.123 INVALID NO META";
769 let result = parse_record(bad_ts_no_meta);
770 assert!(matches!(result, Err(ParseError::InvalidFormat { .. })));
771
772 let short = b"2025-11-17 16:0";
773 let result = parse_record(short);
774 assert!(matches!(result, Err(ParseError::InvalidFormat { .. })));
775 }
776}