Skip to main content

dm_database_parser_sqllog/parser/
mod.rs

1pub mod builder;
2pub(crate) mod encoding;
3pub mod iterator;
4
5pub use builder::LogParserBuilder;
6pub use encoding::FileEncodingHint;
7pub use iterator::LogIterator;
8
9use memchr::memmem::Finder;
10use memchr::{memchr, memrchr};
11use std::str;
12use std::sync::LazyLock;
13
14use crate::error::ParseError;
15use crate::record::{self, Sqllog};
16use ::encoding::all::GB18030;
17use ::encoding::{DecoderTrap, Encoding};
18
19/// Pre-built SIMD searcher for the `") "` meta-close pattern.
20static FINDER_CLOSE_META: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b") "));
21
22/// SQL 日志文件解析器。
23///
24/// 通过 [`LogParserBuilder`] 构建实例。内部将整个文件读入内存,
25/// 自动检测文件编码(UTF-8 或 GB18030)。
26pub struct LogParser {
27    pub(super) data: Vec<u8>,
28    pub(super) encoding: FileEncodingHint,
29}
30
31impl LogParser {
32    /// 返回顺序迭代器。
33    pub fn iter(&self) -> LogIterator<'_> {
34        LogIterator {
35            data: &self.data,
36            pos: 0,
37            encoding: self.encoding,
38            line_number: 1,
39        }
40    }
41}
42
43/// 从原始字节解析单条 SQL 日志记录。
44///
45/// 自动检测多行模式。适合已从文件中读出完整记录的调用方。
46#[cfg(test)]
47pub(crate) fn parse_record(record_bytes: &[u8]) -> Result<Sqllog, ParseError> {
48    parse_record_with_hint(record_bytes, FileEncodingHint::Auto, 0)
49}
50
51/// 核心解析函数:从原始字节一次性解析全部字段到 Sqllog。
52pub(super) fn parse_record_with_hint(
53    record_bytes: &[u8],
54    encoding_hint: FileEncodingHint,
55    line_number: u64,
56) -> Result<Sqllog, ParseError> {
57    // 检测是否多行
58    let is_multiline = memchr(b'\n', record_bytes).is_some();
59
60    // 找到第一行
61    let first_line = if is_multiline {
62        match memchr(b'\n', record_bytes) {
63            Some(idx) => {
64                let mut line = &record_bytes[..idx];
65                if line.ends_with(b"\r") {
66                    line = &line[..line.len() - 1];
67                }
68                line
69            }
70            None => {
71                let mut line = record_bytes;
72                if line.ends_with(b"\r") {
73                    line = &line[..line.len() - 1];
74                }
75                line
76            }
77        }
78    } else {
79        let mut line = record_bytes;
80        if line.ends_with(b"\r") {
81            line = &line[..line.len() - 1];
82        }
83        line
84    };
85
86    // ── 1. 时间戳 ──
87    if first_line.len() < 23 {
88        return Err(make_invalid_format_error(first_line, line_number));
89    }
90    let ts = match str::from_utf8(&first_line[0..23]) {
91        Ok(s) => s.to_string(),
92        Err(_) => return Err(make_invalid_format_error(first_line, line_number)),
93    };
94
95    // ── 2. 元数据 ──
96    let meta_start = match memchr(b'(', &first_line[23..]) {
97        Some(idx) => 23 + idx,
98        None => return Err(make_invalid_format_error(first_line, line_number)),
99    };
100
101    let meta_end = match FINDER_CLOSE_META.find(&first_line[meta_start..]) {
102        Some(idx) => Some(meta_start + idx),
103        None => memrchr(b')', &first_line[meta_start..]).map(|idx| meta_start + idx),
104    };
105
106    let meta_end = match meta_end {
107        Some(idx) => idx,
108        None => return Err(make_invalid_format_error(first_line, line_number)),
109    };
110
111    let meta_bytes = &first_line[meta_start + 1..meta_end];
112
113    // 解析元数据(考虑编码)
114    let (ep, sess_id, thrd_id, username, trxid, statement, appname, client_ip) = match encoding_hint
115    {
116        FileEncodingHint::Utf8 => record::parse_meta_from_bytes(meta_bytes),
117        FileEncodingHint::Auto => {
118            // Auto: try UTF-8 first, then GB18030 fallback
119            match str::from_utf8(meta_bytes) {
120                Ok(_) => record::parse_meta_from_bytes(meta_bytes),
121                Err(_) => match GB18030.decode(meta_bytes, DecoderTrap::Strict) {
122                    Ok(decoded) => record::parse_meta_from_bytes(decoded.as_bytes()),
123                    Err(_) => {
124                        let lossy = String::from_utf8_lossy(meta_bytes).into_owned();
125                        record::parse_meta_from_bytes(lossy.as_bytes())
126                    }
127                },
128            }
129        }
130        FileEncodingHint::Gb18030 => match GB18030.decode(meta_bytes, DecoderTrap::Strict) {
131            Ok(decoded) => record::parse_meta_from_bytes(decoded.as_bytes()),
132            Err(_) => {
133                let lossy = String::from_utf8_lossy(meta_bytes).into_owned();
134                record::parse_meta_from_bytes(lossy.as_bytes())
135            }
136        },
137    };
138
139    // ── 3. Body 和 Indicators ──
140    let body_start_in_first_line = meta_end + 1;
141
142    let content_start = if body_start_in_first_line < first_line.len()
143        && first_line[body_start_in_first_line] == b' '
144    {
145        body_start_in_first_line + 1
146    } else {
147        body_start_in_first_line
148    };
149
150    // 提取可选的标签 [SEL] / [ORA]
151    let mut tag: Option<String> = None;
152    let content_slice = if content_start < record_bytes.len() {
153        let mut s = &record_bytes[content_start..];
154        if !s.is_empty()
155            && s[0] == b'['
156            && let Some(end_idx) = memchr(b']', s)
157            && end_idx >= 1
158        {
159            let inner = &s[1..end_idx];
160            if !inner.contains(&b' ') && inner.len() <= 32 {
161                tag = match encoding_hint {
162                    FileEncodingHint::Utf8 => str::from_utf8(inner).ok().map(|t| t.to_string()),
163                    FileEncodingHint::Auto => str::from_utf8(inner)
164                        .ok()
165                        .map(|t| t.to_string())
166                        .or_else(|| GB18030.decode(inner, DecoderTrap::Strict).ok()),
167                    FileEncodingHint::Gb18030 => GB18030
168                        .decode(inner, DecoderTrap::Strict)
169                        .ok()
170                        .or_else(|| str::from_utf8(inner).ok().map(|s| s.to_string())),
171                };
172                // 跳过 ']' 及后续空白
173                s = &s[end_idx + 1..];
174                let mut skip = 0usize;
175                while skip < s.len() && s[skip].is_ascii_whitespace() {
176                    skip += 1;
177                }
178                s = &s[skip..];
179            }
180        }
181        s
182    } else {
183        &[] as &[u8]
184    };
185
186    // 分割 body 和 indicators
187    let split = record::find_indicators_split(content_slice);
188    let body_bytes = &content_slice[..split];
189    let ind_bytes = &content_slice[split..];
190
191    // 解码 body
192    let sql_raw = match encoding_hint {
193        FileEncodingHint::Utf8 => String::from_utf8_lossy(body_bytes).into_owned(),
194        FileEncodingHint::Auto => match str::from_utf8(body_bytes) {
195            Ok(s) => s.to_string(),
196            Err(_) => match GB18030.decode(body_bytes, DecoderTrap::Strict) {
197                Ok(s) => s,
198                Err(_) => String::from_utf8_lossy(body_bytes).into_owned(),
199            },
200        },
201        FileEncodingHint::Gb18030 => match GB18030.decode(body_bytes, DecoderTrap::Strict) {
202            Ok(s) => s,
203            Err(_) => String::from_utf8_lossy(body_bytes).into_owned(),
204        },
205    };
206
207    // 处理 ORA 前缀
208    let sql = if tag.as_deref() == Some("ORA") {
209        sql_raw.strip_prefix(": ").unwrap_or(&sql_raw).to_string()
210    } else {
211        sql_raw
212    };
213
214    // 解析性能指标
215    let (exectime, rowcount, exec_id) = record::parse_indicators_from_bytes(ind_bytes);
216
217    Ok(Sqllog {
218        ts,
219        tag,
220        ep,
221        sess_id,
222        thrd_id,
223        username,
224        trxid,
225        statement,
226        appname,
227        client_ip,
228        sql,
229        exectime,
230        rowcount,
231        exec_id,
232    })
233}
234
235#[cold]
236fn make_invalid_format_error(raw_bytes: &[u8], line_number: u64) -> ParseError {
237    ParseError::InvalidFormat {
238        raw: String::from_utf8_lossy(raw_bytes).to_string(),
239        line_number,
240    }
241}
242
243// ── 测试 ────────────────────────────────────────────────────────────────────
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    #[cfg(not(miri))]
250    #[test]
251    fn test_builder_encoding_hint_utf8() {
252        use std::io::Write;
253        use tempfile::NamedTempFile;
254
255        let mut tmp = NamedTempFile::new().expect("tmp");
256        write!(
257            tmp,
258            "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:a) SELECT 1"
259        )
260        .unwrap();
261        tmp.as_file().sync_all().unwrap();
262
263        let parser = LogParserBuilder::new(tmp.path())
264            .encoding_hint(FileEncodingHint::Utf8)
265            .build()
266            .expect("build");
267        let record = parser.iter().next().unwrap().unwrap();
268        assert_eq!(record.ts, "2025-11-17 16:09:41.123");
269        assert!(record.sql.contains("SELECT 1"));
270    }
271
272    #[cfg(not(miri))]
273    #[test]
274    fn test_builder_file_not_found() {
275        let result = LogParserBuilder::new("/nonexistent/path.log").build();
276        assert!(result.is_err());
277        match result {
278            Err(ParseError::IoError(_)) => {}
279            _ => panic!("Expected IoError on nonexistent file"),
280        }
281    }
282
283    // ── 从 tests/performance_metrics.rs 迁入 ──────────────────────────────
284
285    fn build_perf_record(tag_and_body: &str, tail: &str) -> Vec<u8> {
286        let header =
287            b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:alice trxid:789 stmt:0x1 appname:bench) ";
288        let mut v = Vec::new();
289        v.extend_from_slice(header);
290        v.extend_from_slice(tag_and_body.as_bytes());
291        if !tail.is_empty() {
292            v.extend_from_slice(tail.as_bytes());
293        }
294        v
295    }
296
297    #[test]
298    fn performance_metrics_full() {
299        let raw = build_perf_record(
300            "SELECT * FROM T ",
301            "EXECTIME: 10.5(ms) ROWCOUNT: 100(rows) EXEC_ID: 999.",
302        );
303        let rec = parse_record(&raw).unwrap();
304        assert!((rec.exectime - 10.5).abs() < 1e-6);
305        assert_eq!(rec.rowcount, 100);
306        assert_eq!(rec.exec_id, 999);
307        assert_eq!(rec.sql, "SELECT * FROM T ");
308    }
309
310    #[test]
311    fn performance_metrics_no_indicators() {
312        let raw = build_perf_record("SELECT 1;", "");
313        let rec = parse_record(&raw).unwrap();
314        assert_eq!(rec.exectime, 0.0);
315        assert_eq!(rec.rowcount, 0);
316        assert_eq!(rec.exec_id, 0);
317        assert_eq!(rec.sql, "SELECT 1;");
318    }
319
320    #[test]
321    fn performance_metrics_ora_tag_strips_colon_space_prefix() {
322        let raw = build_perf_record(
323            "[ORA] : SELECT 1 FROM DUAL ",
324            "EXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 42.",
325        );
326        let rec = parse_record(&raw).unwrap();
327        assert_eq!(rec.tag.as_deref(), Some("ORA"));
328        assert_eq!(rec.sql, "SELECT 1 FROM DUAL ");
329        assert!((rec.exectime - 5.0).abs() < 1e-6);
330        assert_eq!(rec.rowcount, 1);
331        assert_eq!(rec.exec_id, 42);
332    }
333
334    #[test]
335    fn performance_metrics_ora_tag_no_prefix_unchanged() {
336        let raw = build_perf_record(
337            "[ORA] SELECT 1 FROM DUAL ",
338            "EXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 42.",
339        );
340        let rec = parse_record(&raw).unwrap();
341        assert_eq!(rec.tag.as_deref(), Some("ORA"));
342        assert_eq!(rec.sql, "SELECT 1 FROM DUAL ");
343    }
344
345    #[test]
346    fn performance_metrics_non_ora_tag_keeps_prefix_intact() {
347        let raw = build_perf_record("[SEL] : SELECT 1 ", "EXEC_ID: 7.");
348        let rec = parse_record(&raw).unwrap();
349        assert_eq!(rec.tag.as_deref(), Some("SEL"));
350        assert_eq!(rec.sql, ": SELECT 1 ");
351    }
352
353    #[test]
354    fn performance_metrics_no_tag_keeps_prefix_intact() {
355        let raw = build_perf_record(": SELECT 1 ", "EXEC_ID: 7.");
356        let rec = parse_record(&raw).unwrap();
357        assert!(rec.tag.is_none());
358        assert_eq!(rec.sql, ": SELECT 1 ");
359    }
360
361    #[test]
362    fn performance_metrics_exectime_only() {
363        let raw = build_perf_record("DELETE FROM T; ", "EXECTIME: 3.5(ms)");
364        let rec = parse_record(&raw).unwrap();
365        assert!((rec.exectime - 3.5).abs() < 1e-6);
366        assert_eq!(rec.rowcount, 0);
367        assert_eq!(rec.exec_id, 0);
368        assert_eq!(rec.sql, "DELETE FROM T; ");
369    }
370
371    #[test]
372    fn performance_metrics_rowcount_only() {
373        let raw = build_perf_record("UPDATE T SET A=1; ", "ROWCOUNT: 10(rows)");
374        let rec = parse_record(&raw).unwrap();
375        assert_eq!(rec.exectime, 0.0);
376        assert_eq!(rec.rowcount, 10);
377        assert_eq!(rec.exec_id, 0);
378    }
379
380    #[test]
381    fn performance_metrics_exec_id_only() {
382        let raw = build_perf_record("SELECT 1; ", "EXEC_ID: 42.");
383        let rec = parse_record(&raw).unwrap();
384        assert_eq!(rec.exectime, 0.0);
385        assert_eq!(rec.rowcount, 0);
386        assert_eq!(rec.exec_id, 42);
387    }
388
389    #[test]
390    fn performance_metrics_ora_tag_only_colon_space_sql_empty_after_strip() {
391        let raw = build_perf_record("[ORA] : ", "EXEC_ID: 1.");
392        let rec = parse_record(&raw).unwrap();
393        assert_eq!(rec.tag.as_deref(), Some("ORA"));
394        assert_eq!(rec.sql, "");
395    }
396
397    #[test]
398    fn early_exit_no_dot_suffix() {
399        let raw = build_perf_record("SELECT * FROM users WHERE id = 1;", "");
400        let rec = parse_record(&raw).unwrap();
401        assert_eq!(rec.exectime, 0.0);
402        assert_eq!(rec.rowcount, 0);
403        assert_eq!(rec.exec_id, 0);
404    }
405
406    #[test]
407    fn dot_suffix_no_real_indicators_guarded() {
408        let raw = build_perf_record("SELECT url FROM t WHERE url = 'http://example.com'.", "");
409        let rec = parse_record(&raw).unwrap();
410        assert_eq!(rec.exec_id, 0);
411        assert_eq!(rec.exectime, 0.0);
412    }
413
414    #[test]
415    fn dot_suffix_with_real_indicators() {
416        let raw = build_perf_record(
417            "SELECT 1 FROM T ",
418            "EXECTIME: 2.5(ms) ROWCOUNT: 5(rows) EXEC_ID: 77.",
419        );
420        let rec = parse_record(&raw).unwrap();
421        assert!((rec.exectime - 2.5).abs() < 1e-6);
422        assert_eq!(rec.rowcount, 5);
423        assert_eq!(rec.exec_id, 77);
424        assert_eq!(rec.sql, "SELECT 1 FROM T ");
425    }
426
427    #[test]
428    fn fake_keyword_in_body_plus_real_indicators() {
429        let raw = build_perf_record(
430            "SELECT 'EXECTIME: fake' FROM T ",
431            "EXECTIME: 1.0(ms) ROWCOUNT: 3(rows) EXEC_ID: 55.",
432        );
433        let rec = parse_record(&raw).unwrap();
434        assert!((rec.exectime - 1.0).abs() < 1e-6);
435        assert_eq!(rec.rowcount, 3);
436        assert_eq!(rec.exec_id, 55);
437        assert!(rec.sql.contains("EXECTIME: fake"));
438    }
439
440    #[test]
441    fn multiple_colons_in_body() {
442        let raw = build_perf_record(
443            "SELECT 'http://example.com:8080/path' FROM T ",
444            "EXECTIME: 3.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 99.",
445        );
446        let rec = parse_record(&raw).unwrap();
447        assert!((rec.exectime - 3.0).abs() < 1e-6);
448        assert_eq!(rec.rowcount, 1);
449        assert_eq!(rec.exec_id, 99);
450        assert!(rec.sql.contains("http://example.com:8080/path"));
451    }
452
453    #[test]
454    fn exec_id_only_split_correct() {
455        let raw = build_perf_record("INSERT INTO T VALUES (1); ", "EXEC_ID: 123.");
456        let rec = parse_record(&raw).unwrap();
457        assert_eq!(rec.exec_id, 123);
458        assert_eq!(rec.exectime, 0.0);
459        assert_eq!(rec.rowcount, 0);
460        assert_eq!(rec.sql, "INSERT INTO T VALUES (1); ");
461    }
462
463    // ── 从 tests/sqllog_additional.rs 迁入 ───────────────────────────────
464
465    fn build_additional_record(line1_body: &str, tail: &str) -> Vec<u8> {
466        let header = b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:alice trxid:789 stmt:0x1 appname:bench) ";
467        let mut v = Vec::new();
468        v.extend_from_slice(header);
469        v.extend_from_slice(line1_body.as_bytes());
470        if !tail.is_empty() {
471            v.extend_from_slice(tail.as_bytes());
472        }
473        v
474    }
475
476    #[test]
477    fn body_without_indicators() {
478        let raw = build_additional_record("SELECT 1;", "");
479        let rec = parse_record(&raw).expect("parse ok");
480        assert_eq!(rec.sql, "SELECT 1;");
481        assert_eq!(rec.exec_id, 0);
482        assert_eq!(rec.exectime, 0.0);
483        assert_eq!(rec.rowcount, 0);
484    }
485
486    #[test]
487    fn indicators_exec_id_only() {
488        let raw = build_additional_record("SELECT 1; ", "EXEC_ID: 42.");
489        let rec = parse_record(&raw).unwrap();
490        assert_eq!(rec.sql, "SELECT 1; ");
491        assert_eq!(rec.exec_id, 42);
492    }
493
494    #[test]
495    fn indicators_rowcount_only() {
496        let raw = build_additional_record("UPDATE T SET A=1; ", "ROWCOUNT: 10(rows)");
497        let rec = parse_record(&raw).unwrap();
498        assert_eq!(rec.sql, "UPDATE T SET A=1; ");
499        assert_eq!(rec.rowcount, 10);
500    }
501
502    #[test]
503    fn indicators_exectime_only() {
504        let raw = build_additional_record("DELETE FROM T; ", "EXECTIME: 3.5(ms)");
505        let rec = parse_record(&raw).unwrap();
506        assert_eq!(rec.sql, "DELETE FROM T; ");
507        assert!((rec.exectime - 3.5).abs() < 1e-6);
508    }
509
510    #[test]
511    fn indicators_permutation_all() {
512        let tail = "ROWCOUNT: 5(rows) EXECTIME: 12.25(ms) EXEC_ID: 999.";
513        let raw = build_additional_record("SELECT * FROM T ", tail);
514        let rec = parse_record(&raw).unwrap();
515        assert_eq!(rec.sql, "SELECT * FROM T ");
516        assert_eq!(rec.rowcount, 5);
517        assert!((rec.exectime - 12.25).abs() < 1e-6);
518        assert_eq!(rec.exec_id, 999);
519    }
520
521    #[test]
522    fn meta_parsing_basic() {
523        let raw = b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:SYSDBA trxid:0 stmt:0x2 appname:cli) SELECT";
524        let rec = parse_record(raw).unwrap();
525        assert_eq!(rec.ep, 2);
526        assert_eq!(rec.sess_id, "0xABC");
527        assert_eq!(rec.thrd_id, "777");
528        assert_eq!(rec.username, "SYSDBA");
529        assert_eq!(rec.trxid, "0");
530        assert_eq!(rec.statement, "0x2");
531        assert_eq!(rec.appname, "cli");
532    }
533
534    #[test]
535    fn meta_parsing_empty_appname() {
536        let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:) X";
537        let rec = parse_record(raw).unwrap();
538        assert_eq!(rec.appname, "");
539    }
540
541    #[test]
542    fn appname_empty_followed_by_ip_colon_single_should_keep_appname_empty() {
543        let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: ip:10.1.1.1) X";
544        let rec = parse_record(raw).unwrap();
545        assert_eq!(rec.appname, "");
546        assert_eq!(rec.client_ip, "10.1.1.1");
547    }
548
549    #[test]
550    fn appname_empty_followed_by_ip_triple_colon_should_keep_appname_empty() {
551        let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: ip:::ffff:10.3.100.68) X";
552        let rec = parse_record(raw).unwrap();
553        assert_eq!(rec.appname, "");
554        assert_eq!(rec.client_ip, "::ffff:10.3.100.68");
555    }
556
557    #[test]
558    fn meta_parsing_gb18030_username() {
559        use ::encoding::all::GB18030;
560        use ::encoding::{EncoderTrap, Encoding};
561
562        let username = "用户";
563        let user_bytes = GB18030
564            .encode(username, EncoderTrap::Strict)
565            .expect("encode");
566
567        let mut raw: Vec<u8> = b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
568        raw.extend_from_slice(&user_bytes);
569        raw.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT");
570
571        let rec = parse_record(&raw).unwrap();
572        assert_eq!(rec.username, username);
573    }
574
575    #[test]
576    fn tag_extraction_and_body_trim() {
577        let raw = b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:u trxid:3 stmt:4 appname:bench) [SEL] SELECT 1; EXEC_ID: 42.";
578        let rec = parse_record(raw).unwrap();
579        assert_eq!(rec.tag.as_deref(), Some("SEL"));
580        assert_eq!(rec.sql, "SELECT 1; ");
581    }
582
583    #[test]
584    #[cfg(not(miri))]
585    fn file_encoding_detection_gb18030() {
586        use ::encoding::all::GB18030;
587        use ::encoding::{EncoderTrap, Encoding};
588        use std::io::Write;
589        use tempfile::NamedTempFile;
590
591        let username = "用户";
592        let user_bytes = GB18030
593            .encode(username, EncoderTrap::Strict)
594            .expect("encode");
595
596        let mut line: Vec<u8> =
597            b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
598        line.extend_from_slice(&user_bytes);
599        line.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT\n");
600
601        let mut tmp = NamedTempFile::new().expect("tmp");
602        tmp.write_all(&line).expect("write");
603        tmp.as_file().sync_all().expect("sync");
604
605        let parser = LogParserBuilder::new(tmp.path()).build().expect("open");
606        let rec = parser.iter().next().unwrap().unwrap();
607        assert_eq!(rec.username, username);
608    }
609
610    #[test]
611    fn find_indicators_split_exectime_keyword_in_sql_body_no_indicators() {
612        let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT * FROM t WHERE col = 'EXECTIME: slow'\n";
613        let record = parse_record(raw.as_bytes()).unwrap();
614        assert_eq!(record.sql, "SELECT * FROM t WHERE col = 'EXECTIME: slow'\n");
615        assert_eq!(record.exec_id, 0);
616        assert_eq!(record.exectime, 0.0);
617    }
618
619    #[test]
620    fn find_indicators_split_rowcount_keyword_in_sql_body_no_indicators() {
621        let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT * FROM t WHERE cnt = 'ROWCOUNT: many'\n";
622        let record = parse_record(raw.as_bytes()).unwrap();
623        assert_eq!(record.sql, "SELECT * FROM t WHERE cnt = 'ROWCOUNT: many'\n");
624        assert_eq!(record.rowcount, 0);
625    }
626
627    #[test]
628    fn find_indicators_split_exec_id_keyword_in_sql_body_no_indicators() {
629        let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT EXEC_ID: foo FROM dual\n";
630        let record = parse_record(raw.as_bytes()).unwrap();
631        assert_eq!(record.sql, "SELECT EXEC_ID: foo FROM dual\n");
632        assert_eq!(record.exec_id, 0);
633    }
634
635    #[test]
636    fn find_indicators_split_keyword_in_body_plus_real_indicators() {
637        let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT EXECTIME: slow\nEXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 99.\n";
638        let record = parse_record(raw.as_bytes()).unwrap();
639        assert!((record.exectime - 5.0).abs() < 1e-6);
640        assert!(record.sql.contains("SELECT"));
641    }
642
643    #[test]
644    fn find_indicators_split_multiple_keywords_in_body_no_indicators() {
645        let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) EXECTIME: x ROWCOUNT: y EXEC_ID: z\n";
646        let record = parse_record(raw.as_bytes()).unwrap();
647        assert_eq!(record.sql, "EXECTIME: x ROWCOUNT: y EXEC_ID: z\n");
648        assert_eq!(record.exec_id, 0);
649        assert_eq!(record.exectime, 0.0);
650    }
651
652    #[test]
653    #[cfg(not(miri))]
654    fn encoding_detection_gb18030_after_64kb_boundary() {
655        use ::encoding::all::GB18030;
656        use ::encoding::{EncoderTrap, Encoding};
657        use std::io::Write;
658        use tempfile::NamedTempFile;
659
660        let ascii_record = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:ascii trxid:0 stmt:0 appname:app) SELECT 1;\n";
661        let repeat_count = 65536 / ascii_record.len() + 2;
662
663        let username = "用户";
664        let user_bytes = GB18030.encode(username, EncoderTrap::Strict).unwrap();
665        let mut gb_line: Vec<u8> = b"2025-11-17 16:09:42.000 (EP[0] sess:2 thrd:2 user:".to_vec();
666        gb_line.extend_from_slice(&user_bytes);
667        gb_line.extend_from_slice(b" trxid:0 stmt:0 appname:app) SELECT 2;\n");
668
669        let mut tmp = NamedTempFile::new().unwrap();
670        for _ in 0..repeat_count {
671            tmp.write_all(ascii_record.as_bytes()).unwrap();
672        }
673        tmp.write_all(&gb_line).unwrap();
674        tmp.as_file().sync_all().unwrap();
675
676        let parser = LogParserBuilder::new(tmp.path()).build().unwrap();
677        let records: Vec<_> = parser.iter().collect();
678        let last = records.last().unwrap().as_ref().unwrap();
679        assert_eq!(last.username, username);
680    }
681
682    #[test]
683    #[cfg(not(miri))]
684    fn file_encoding_detection_utf8() {
685        use std::io::Write;
686        use tempfile::NamedTempFile;
687
688        let username = "用户";
689        let user_bytes = username.as_bytes();
690
691        let mut line: Vec<u8> =
692            b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
693        line.extend_from_slice(user_bytes);
694        line.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT\n");
695
696        let mut tmp = NamedTempFile::new().expect("tmp");
697        tmp.write_all(&line).expect("write");
698        tmp.as_file().sync_all().expect("sync");
699
700        let parser = LogParserBuilder::new(tmp.path()).build().expect("open");
701        let rec = parser.iter().next().unwrap().unwrap();
702        assert_eq!(rec.username, username);
703    }
704
705    // ── 从 tests/parser_coverage.rs 迁入 ─────────────────────────────────
706
707    /// parse_record with no embedded newline → hits the None branch in is_multiline=true path
708    #[test]
709    fn parse_record_single_line_no_newline() {
710        let raw =
711            b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:U trxid:3 stmt:4 appname:a) SELECT 1";
712        let rec = parse_record(raw).unwrap();
713        assert_eq!(rec.ts, "2025-11-17 16:09:41.123");
714        assert!(rec.sql.contains("SELECT"));
715    }
716
717    /// Record >= 23 bytes with valid timestamp but no `(` → InvalidFormat at meta_start
718    #[test]
719    fn parse_record_no_meta_open_paren() {
720        let raw = b"2025-11-17 16:09:41.123 NO_OPEN_PAREN_AT_ALL_HERE body";
721        let result = parse_record(raw);
722        assert!(result.is_err());
723    }
724
725    /// Record with `(` but no closing `)` → InvalidFormat at meta_end
726    #[test]
727    fn parse_record_no_meta_close_paren() {
728        let raw = b"2025-11-17 16:09:41.123 (UNCLOSED_META body";
729        let result = parse_record(raw);
730        assert!(result.is_err());
731    }
732
733    // ── 从 tests/edge_cases.rs 迁入 ──────────────────────────────────────
734
735    #[test]
736    fn meta_closing_paren_without_space_then_body_on_next_line() {
737        let content = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:app)\nSELECT * FROM T\nEXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 7.\n";
738        let rec = parse_record(content).expect("parse ok");
739        assert!(rec.sql.trim_start().starts_with("SELECT * FROM T"));
740        assert_eq!(rec.exec_id, 7);
741    }
742
743    #[test]
744    fn appname_empty_then_take_next_token_as_appname_not_ip() {
745        let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: [SEL] ip:::ffff:10.0.0.1) X";
746        let rec = parse_record(raw).unwrap();
747        assert_eq!(rec.appname, "[SEL]");
748        assert_eq!(rec.client_ip, "::ffff:10.0.0.1");
749    }
750
751    #[test]
752    fn indicators_not_strictly_formatted_should_not_split_body() {
753        let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:app) SELECT 1; EXEC_ID:123";
754        let rec = parse_record(raw).unwrap();
755        // EXEC_ID:123 无点号结尾,不会被识别为指标,整段作为 SQL body
756        assert_eq!(rec.exec_id, 0);
757        assert!(rec.sql.ends_with("EXEC_ID:123"));
758    }
759
760    // ── 从 tests/parser_errors.rs 迁入 ───────────────────────────────────
761
762    #[test]
763    fn test_parse_record_timestamp_validation() {
764        use crate::error::ParseError;
765
766        let valid = b"2025-11-17 16:09:41.123 (EP[0]) SELECT";
767        let result = parse_record(valid);
768        assert!(result.is_ok());
769
770        let bad_ts_no_meta = b"2025-11-17 16:09:41.123 INVALID NO META";
771        let result = parse_record(bad_ts_no_meta);
772        assert!(matches!(result, Err(ParseError::InvalidFormat { .. })));
773
774        let short = b"2025-11-17 16:0";
775        let result = parse_record(short);
776        assert!(matches!(result, Err(ParseError::InvalidFormat { .. })));
777    }
778}