Skip to main content

dm_database_parser_sqllog/parser/
mod.rs

1pub mod builder;
2pub(crate) mod encoding;
3pub mod iterator;
4
5pub use builder::LogParserBuilder;
6pub use encoding::FileEncodingHint;
7pub use iterator::LogIterator;
8
9use memchr::memmem::Finder;
10use memchr::{memchr, memrchr};
11use std::fs::File;
12use std::path::PathBuf;
13use std::str;
14use std::sync::LazyLock;
15
16use crate::error::ParseError;
17use crate::record::{self, Sqllog};
18use ::encoding::all::GB18030;
19use ::encoding::{DecoderTrap, Encoding};
20
21/// Pre-built SIMD searcher for the `") "` meta-close pattern.
22static FINDER_CLOSE_META: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b") "));
23
24/// SQL 日志文件解析器。
25///
26/// 通过 [`LogParserBuilder`] 构建实例。每次调用 [`iter`](LogParser::iter) 时
27/// 打开文件并以流式方式逐行读取,内存占用与文件大小无关。
28pub struct LogParser {
29    pub(super) path: PathBuf,
30    pub(super) encoding: FileEncodingHint,
31}
32
33impl LogParser {
34    /// 打开文件并返回流式迭代器。
35    pub fn iter(&self) -> Result<LogIterator, ParseError> {
36        let file = File::open(&self.path).map_err(|e| ParseError::IoError(e.to_string()))?;
37        Ok(LogIterator::new(file, self.encoding))
38    }
39}
40
41/// 从原始字节解析单条 SQL 日志记录。
42///
43/// 自动检测多行模式。适合已从文件中读出完整记录的调用方。
44#[cfg(test)]
45pub(crate) fn parse_record(record_bytes: &[u8]) -> Result<Sqllog, ParseError> {
46    parse_record_with_hint(record_bytes, FileEncodingHint::Auto, 0)
47}
48
49/// 核心解析函数:从原始字节一次性解析全部字段到 Sqllog。
50pub(super) fn parse_record_with_hint(
51    record_bytes: &[u8],
52    encoding_hint: FileEncodingHint,
53    line_number: u64,
54) -> Result<Sqllog, ParseError> {
55    // 检测是否多行
56    let is_multiline = memchr(b'\n', record_bytes).is_some();
57
58    // 找到第一行
59    let first_line = if is_multiline {
60        match memchr(b'\n', record_bytes) {
61            Some(idx) => {
62                let mut line = &record_bytes[..idx];
63                if line.ends_with(b"\r") {
64                    line = &line[..line.len() - 1];
65                }
66                line
67            }
68            None => {
69                let mut line = record_bytes;
70                if line.ends_with(b"\r") {
71                    line = &line[..line.len() - 1];
72                }
73                line
74            }
75        }
76    } else {
77        let mut line = record_bytes;
78        if line.ends_with(b"\r") {
79            line = &line[..line.len() - 1];
80        }
81        line
82    };
83
84    // ── 1. 时间戳 ──
85    if first_line.len() < 23 {
86        return Err(make_invalid_format_error(first_line, line_number));
87    }
88    let ts = match str::from_utf8(&first_line[0..23]) {
89        Ok(s) => s.to_string(),
90        Err(_) => return Err(make_invalid_format_error(first_line, line_number)),
91    };
92
93    // ── 2. 元数据 ──
94    let meta_start = match memchr(b'(', &first_line[23..]) {
95        Some(idx) => 23 + idx,
96        None => return Err(make_invalid_format_error(first_line, line_number)),
97    };
98
99    let meta_end = match FINDER_CLOSE_META.find(&first_line[meta_start..]) {
100        Some(idx) => Some(meta_start + idx),
101        None => memrchr(b')', &first_line[meta_start..]).map(|idx| meta_start + idx),
102    };
103
104    let meta_end = match meta_end {
105        Some(idx) => idx,
106        None => return Err(make_invalid_format_error(first_line, line_number)),
107    };
108
109    let meta_bytes = &first_line[meta_start + 1..meta_end];
110
111    // 解析元数据(考虑编码)
112    let (ep, sess_id, thrd_id, username, trxid, statement, appname, client_ip) = match encoding_hint
113    {
114        FileEncodingHint::Utf8 => record::parse_meta_from_bytes(meta_bytes),
115        FileEncodingHint::Auto => {
116            // Auto: try UTF-8 first, then GB18030 fallback
117            match str::from_utf8(meta_bytes) {
118                Ok(_) => record::parse_meta_from_bytes(meta_bytes),
119                Err(_) => match GB18030.decode(meta_bytes, DecoderTrap::Strict) {
120                    Ok(decoded) => record::parse_meta_from_bytes(decoded.as_bytes()),
121                    Err(_) => {
122                        let lossy = String::from_utf8_lossy(meta_bytes).into_owned();
123                        record::parse_meta_from_bytes(lossy.as_bytes())
124                    }
125                },
126            }
127        }
128        FileEncodingHint::Gb18030 => match GB18030.decode(meta_bytes, DecoderTrap::Strict) {
129            Ok(decoded) => record::parse_meta_from_bytes(decoded.as_bytes()),
130            Err(_) => {
131                let lossy = String::from_utf8_lossy(meta_bytes).into_owned();
132                record::parse_meta_from_bytes(lossy.as_bytes())
133            }
134        },
135    };
136
137    // ── 3. Body 和 Indicators ──
138    let body_start_in_first_line = meta_end + 1;
139
140    let content_start = if body_start_in_first_line < first_line.len()
141        && first_line[body_start_in_first_line] == b' '
142    {
143        body_start_in_first_line + 1
144    } else {
145        body_start_in_first_line
146    };
147
148    // 提取可选的标签 [SEL] / [ORA]
149    let mut tag: Option<String> = None;
150    let content_slice = if content_start < record_bytes.len() {
151        let mut s = &record_bytes[content_start..];
152        if !s.is_empty()
153            && s[0] == b'['
154            && let Some(end_idx) = memchr(b']', s)
155            && end_idx >= 1
156        {
157            let inner = &s[1..end_idx];
158            if !inner.contains(&b' ') && inner.len() <= 32 {
159                tag = match encoding_hint {
160                    FileEncodingHint::Utf8 => str::from_utf8(inner).ok().map(|t| t.to_string()),
161                    FileEncodingHint::Auto => str::from_utf8(inner)
162                        .ok()
163                        .map(|t| t.to_string())
164                        .or_else(|| GB18030.decode(inner, DecoderTrap::Strict).ok()),
165                    FileEncodingHint::Gb18030 => GB18030
166                        .decode(inner, DecoderTrap::Strict)
167                        .ok()
168                        .or_else(|| str::from_utf8(inner).ok().map(|s| s.to_string())),
169                };
170                // 跳过 ']' 及后续空白
171                s = &s[end_idx + 1..];
172                let mut skip = 0usize;
173                while skip < s.len() && s[skip].is_ascii_whitespace() {
174                    skip += 1;
175                }
176                s = &s[skip..];
177            }
178        }
179        s
180    } else {
181        &[] as &[u8]
182    };
183
184    // 分割 body 和 indicators
185    let split = record::find_indicators_split(content_slice);
186    let body_bytes = &content_slice[..split];
187    let ind_bytes = &content_slice[split..];
188
189    // 解码 body
190    let sql_raw = match encoding_hint {
191        FileEncodingHint::Utf8 => String::from_utf8_lossy(body_bytes).into_owned(),
192        FileEncodingHint::Auto => match str::from_utf8(body_bytes) {
193            Ok(s) => s.to_string(),
194            Err(_) => match GB18030.decode(body_bytes, DecoderTrap::Strict) {
195                Ok(s) => s,
196                Err(_) => String::from_utf8_lossy(body_bytes).into_owned(),
197            },
198        },
199        FileEncodingHint::Gb18030 => match GB18030.decode(body_bytes, DecoderTrap::Strict) {
200            Ok(s) => s,
201            Err(_) => String::from_utf8_lossy(body_bytes).into_owned(),
202        },
203    };
204
205    // 处理 ORA 前缀
206    let sql = if tag.as_deref() == Some("ORA") {
207        sql_raw.strip_prefix(": ").unwrap_or(&sql_raw).to_string()
208    } else {
209        sql_raw
210    };
211
212    // 解析性能指标
213    let (exectime, rowcount, exec_id) = record::parse_indicators_from_bytes(ind_bytes);
214
215    Ok(Sqllog {
216        ts,
217        tag,
218        ep,
219        sess_id,
220        thrd_id,
221        username,
222        trxid,
223        statement,
224        appname,
225        client_ip,
226        sql,
227        exectime,
228        rowcount,
229        exec_id,
230    })
231}
232
233#[cold]
234fn make_invalid_format_error(raw_bytes: &[u8], line_number: u64) -> ParseError {
235    ParseError::InvalidFormat {
236        raw: String::from_utf8_lossy(raw_bytes).to_string(),
237        line_number,
238    }
239}
240
241// ── 测试 ────────────────────────────────────────────────────────────────────
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    #[cfg(not(miri))]
248    #[test]
249    fn test_builder_encoding_hint_utf8() {
250        use std::io::Write;
251        use tempfile::NamedTempFile;
252
253        let mut tmp = NamedTempFile::new().expect("tmp");
254        write!(
255            tmp,
256            "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:a) SELECT 1"
257        )
258        .unwrap();
259        tmp.as_file().sync_all().unwrap();
260
261        let parser = LogParserBuilder::new(tmp.path())
262            .encoding_hint(FileEncodingHint::Utf8)
263            .build()
264            .expect("build");
265        let record = parser.iter().unwrap().next().unwrap().unwrap();
266        assert_eq!(record.ts, "2025-11-17 16:09:41.123");
267        assert!(record.sql.contains("SELECT 1"));
268    }
269
270    #[cfg(not(miri))]
271    #[test]
272    fn test_builder_file_not_found() {
273        let result = LogParserBuilder::new("/nonexistent/path.log").build();
274        assert!(result.is_err());
275        match result {
276            Err(ParseError::IoError(_)) => {}
277            _ => panic!("Expected IoError on nonexistent file"),
278        }
279    }
280
281    // ── 从 tests/performance_metrics.rs 迁入 ──────────────────────────────
282
283    fn build_perf_record(tag_and_body: &str, tail: &str) -> Vec<u8> {
284        let header =
285            b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:alice trxid:789 stmt:0x1 appname:bench) ";
286        let mut v = Vec::new();
287        v.extend_from_slice(header);
288        v.extend_from_slice(tag_and_body.as_bytes());
289        if !tail.is_empty() {
290            v.extend_from_slice(tail.as_bytes());
291        }
292        v
293    }
294
295    #[test]
296    fn performance_metrics_full() {
297        let raw = build_perf_record(
298            "SELECT * FROM T ",
299            "EXECTIME: 10.5(ms) ROWCOUNT: 100(rows) EXEC_ID: 999.",
300        );
301        let rec = parse_record(&raw).unwrap();
302        assert!((rec.exectime - 10.5).abs() < 1e-6);
303        assert_eq!(rec.rowcount, 100);
304        assert_eq!(rec.exec_id, 999);
305        assert_eq!(rec.sql, "SELECT * FROM T ");
306    }
307
308    #[test]
309    fn performance_metrics_no_indicators() {
310        let raw = build_perf_record("SELECT 1;", "");
311        let rec = parse_record(&raw).unwrap();
312        assert_eq!(rec.exectime, 0.0);
313        assert_eq!(rec.rowcount, 0);
314        assert_eq!(rec.exec_id, 0);
315        assert_eq!(rec.sql, "SELECT 1;");
316    }
317
318    #[test]
319    fn performance_metrics_ora_tag_strips_colon_space_prefix() {
320        let raw = build_perf_record(
321            "[ORA] : SELECT 1 FROM DUAL ",
322            "EXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 42.",
323        );
324        let rec = parse_record(&raw).unwrap();
325        assert_eq!(rec.tag.as_deref(), Some("ORA"));
326        assert_eq!(rec.sql, "SELECT 1 FROM DUAL ");
327        assert!((rec.exectime - 5.0).abs() < 1e-6);
328        assert_eq!(rec.rowcount, 1);
329        assert_eq!(rec.exec_id, 42);
330    }
331
332    #[test]
333    fn performance_metrics_ora_tag_no_prefix_unchanged() {
334        let raw = build_perf_record(
335            "[ORA] SELECT 1 FROM DUAL ",
336            "EXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 42.",
337        );
338        let rec = parse_record(&raw).unwrap();
339        assert_eq!(rec.tag.as_deref(), Some("ORA"));
340        assert_eq!(rec.sql, "SELECT 1 FROM DUAL ");
341    }
342
343    #[test]
344    fn performance_metrics_non_ora_tag_keeps_prefix_intact() {
345        let raw = build_perf_record("[SEL] : SELECT 1 ", "EXEC_ID: 7.");
346        let rec = parse_record(&raw).unwrap();
347        assert_eq!(rec.tag.as_deref(), Some("SEL"));
348        assert_eq!(rec.sql, ": SELECT 1 ");
349    }
350
351    #[test]
352    fn performance_metrics_no_tag_keeps_prefix_intact() {
353        let raw = build_perf_record(": SELECT 1 ", "EXEC_ID: 7.");
354        let rec = parse_record(&raw).unwrap();
355        assert!(rec.tag.is_none());
356        assert_eq!(rec.sql, ": SELECT 1 ");
357    }
358
359    #[test]
360    fn performance_metrics_exectime_only() {
361        let raw = build_perf_record("DELETE FROM T; ", "EXECTIME: 3.5(ms)");
362        let rec = parse_record(&raw).unwrap();
363        assert!((rec.exectime - 3.5).abs() < 1e-6);
364        assert_eq!(rec.rowcount, 0);
365        assert_eq!(rec.exec_id, 0);
366        assert_eq!(rec.sql, "DELETE FROM T; ");
367    }
368
369    #[test]
370    fn performance_metrics_rowcount_only() {
371        let raw = build_perf_record("UPDATE T SET A=1; ", "ROWCOUNT: 10(rows)");
372        let rec = parse_record(&raw).unwrap();
373        assert_eq!(rec.exectime, 0.0);
374        assert_eq!(rec.rowcount, 10);
375        assert_eq!(rec.exec_id, 0);
376    }
377
378    #[test]
379    fn performance_metrics_exec_id_only() {
380        let raw = build_perf_record("SELECT 1; ", "EXEC_ID: 42.");
381        let rec = parse_record(&raw).unwrap();
382        assert_eq!(rec.exectime, 0.0);
383        assert_eq!(rec.rowcount, 0);
384        assert_eq!(rec.exec_id, 42);
385    }
386
387    #[test]
388    fn performance_metrics_ora_tag_only_colon_space_sql_empty_after_strip() {
389        let raw = build_perf_record("[ORA] : ", "EXEC_ID: 1.");
390        let rec = parse_record(&raw).unwrap();
391        assert_eq!(rec.tag.as_deref(), Some("ORA"));
392        assert_eq!(rec.sql, "");
393    }
394
395    #[test]
396    fn early_exit_no_dot_suffix() {
397        let raw = build_perf_record("SELECT * FROM users WHERE id = 1;", "");
398        let rec = parse_record(&raw).unwrap();
399        assert_eq!(rec.exectime, 0.0);
400        assert_eq!(rec.rowcount, 0);
401        assert_eq!(rec.exec_id, 0);
402    }
403
404    #[test]
405    fn dot_suffix_no_real_indicators_guarded() {
406        let raw = build_perf_record("SELECT url FROM t WHERE url = 'http://example.com'.", "");
407        let rec = parse_record(&raw).unwrap();
408        assert_eq!(rec.exec_id, 0);
409        assert_eq!(rec.exectime, 0.0);
410    }
411
412    #[test]
413    fn dot_suffix_with_real_indicators() {
414        let raw = build_perf_record(
415            "SELECT 1 FROM T ",
416            "EXECTIME: 2.5(ms) ROWCOUNT: 5(rows) EXEC_ID: 77.",
417        );
418        let rec = parse_record(&raw).unwrap();
419        assert!((rec.exectime - 2.5).abs() < 1e-6);
420        assert_eq!(rec.rowcount, 5);
421        assert_eq!(rec.exec_id, 77);
422        assert_eq!(rec.sql, "SELECT 1 FROM T ");
423    }
424
425    #[test]
426    fn fake_keyword_in_body_plus_real_indicators() {
427        let raw = build_perf_record(
428            "SELECT 'EXECTIME: fake' FROM T ",
429            "EXECTIME: 1.0(ms) ROWCOUNT: 3(rows) EXEC_ID: 55.",
430        );
431        let rec = parse_record(&raw).unwrap();
432        assert!((rec.exectime - 1.0).abs() < 1e-6);
433        assert_eq!(rec.rowcount, 3);
434        assert_eq!(rec.exec_id, 55);
435        assert!(rec.sql.contains("EXECTIME: fake"));
436    }
437
438    #[test]
439    fn multiple_colons_in_body() {
440        let raw = build_perf_record(
441            "SELECT 'http://example.com:8080/path' FROM T ",
442            "EXECTIME: 3.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 99.",
443        );
444        let rec = parse_record(&raw).unwrap();
445        assert!((rec.exectime - 3.0).abs() < 1e-6);
446        assert_eq!(rec.rowcount, 1);
447        assert_eq!(rec.exec_id, 99);
448        assert!(rec.sql.contains("http://example.com:8080/path"));
449    }
450
451    #[test]
452    fn exec_id_only_split_correct() {
453        let raw = build_perf_record("INSERT INTO T VALUES (1); ", "EXEC_ID: 123.");
454        let rec = parse_record(&raw).unwrap();
455        assert_eq!(rec.exec_id, 123);
456        assert_eq!(rec.exectime, 0.0);
457        assert_eq!(rec.rowcount, 0);
458        assert_eq!(rec.sql, "INSERT INTO T VALUES (1); ");
459    }
460
461    // ── 从 tests/sqllog_additional.rs 迁入 ───────────────────────────────
462
463    fn build_additional_record(line1_body: &str, tail: &str) -> Vec<u8> {
464        let header = b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:alice trxid:789 stmt:0x1 appname:bench) ";
465        let mut v = Vec::new();
466        v.extend_from_slice(header);
467        v.extend_from_slice(line1_body.as_bytes());
468        if !tail.is_empty() {
469            v.extend_from_slice(tail.as_bytes());
470        }
471        v
472    }
473
474    #[test]
475    fn body_without_indicators() {
476        let raw = build_additional_record("SELECT 1;", "");
477        let rec = parse_record(&raw).expect("parse ok");
478        assert_eq!(rec.sql, "SELECT 1;");
479        assert_eq!(rec.exec_id, 0);
480        assert_eq!(rec.exectime, 0.0);
481        assert_eq!(rec.rowcount, 0);
482    }
483
484    #[test]
485    fn indicators_exec_id_only() {
486        let raw = build_additional_record("SELECT 1; ", "EXEC_ID: 42.");
487        let rec = parse_record(&raw).unwrap();
488        assert_eq!(rec.sql, "SELECT 1; ");
489        assert_eq!(rec.exec_id, 42);
490    }
491
492    #[test]
493    fn indicators_rowcount_only() {
494        let raw = build_additional_record("UPDATE T SET A=1; ", "ROWCOUNT: 10(rows)");
495        let rec = parse_record(&raw).unwrap();
496        assert_eq!(rec.sql, "UPDATE T SET A=1; ");
497        assert_eq!(rec.rowcount, 10);
498    }
499
500    #[test]
501    fn indicators_exectime_only() {
502        let raw = build_additional_record("DELETE FROM T; ", "EXECTIME: 3.5(ms)");
503        let rec = parse_record(&raw).unwrap();
504        assert_eq!(rec.sql, "DELETE FROM T; ");
505        assert!((rec.exectime - 3.5).abs() < 1e-6);
506    }
507
508    #[test]
509    fn indicators_permutation_all() {
510        let tail = "ROWCOUNT: 5(rows) EXECTIME: 12.25(ms) EXEC_ID: 999.";
511        let raw = build_additional_record("SELECT * FROM T ", tail);
512        let rec = parse_record(&raw).unwrap();
513        assert_eq!(rec.sql, "SELECT * FROM T ");
514        assert_eq!(rec.rowcount, 5);
515        assert!((rec.exectime - 12.25).abs() < 1e-6);
516        assert_eq!(rec.exec_id, 999);
517    }
518
519    #[test]
520    fn meta_parsing_basic() {
521        let raw = b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:SYSDBA trxid:0 stmt:0x2 appname:cli) SELECT";
522        let rec = parse_record(raw).unwrap();
523        assert_eq!(rec.ep, 2);
524        assert_eq!(rec.sess_id, "0xABC");
525        assert_eq!(rec.thrd_id, "777");
526        assert_eq!(rec.username, "SYSDBA");
527        assert_eq!(rec.trxid, "0");
528        assert_eq!(rec.statement, "0x2");
529        assert_eq!(rec.appname, "cli");
530    }
531
532    #[test]
533    fn meta_parsing_empty_appname() {
534        let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:) X";
535        let rec = parse_record(raw).unwrap();
536        assert_eq!(rec.appname, "");
537    }
538
539    #[test]
540    fn appname_empty_followed_by_ip_colon_single_should_keep_appname_empty() {
541        let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: ip:10.1.1.1) X";
542        let rec = parse_record(raw).unwrap();
543        assert_eq!(rec.appname, "");
544        assert_eq!(rec.client_ip, "10.1.1.1");
545    }
546
547    #[test]
548    fn appname_empty_followed_by_ip_triple_colon_should_keep_appname_empty() {
549        let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: ip:::ffff:10.3.100.68) X";
550        let rec = parse_record(raw).unwrap();
551        assert_eq!(rec.appname, "");
552        assert_eq!(rec.client_ip, "::ffff:10.3.100.68");
553    }
554
555    #[test]
556    fn meta_parsing_gb18030_username() {
557        use ::encoding::all::GB18030;
558        use ::encoding::{EncoderTrap, Encoding};
559
560        let username = "用户";
561        let user_bytes = GB18030
562            .encode(username, EncoderTrap::Strict)
563            .expect("encode");
564
565        let mut raw: Vec<u8> = b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
566        raw.extend_from_slice(&user_bytes);
567        raw.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT");
568
569        let rec = parse_record(&raw).unwrap();
570        assert_eq!(rec.username, username);
571    }
572
573    #[test]
574    fn tag_extraction_and_body_trim() {
575        let raw = b"2025-11-17 16:09:41.123 (EP[1] sess:123 thrd:456 user:u trxid:3 stmt:4 appname:bench) [SEL] SELECT 1; EXEC_ID: 42.";
576        let rec = parse_record(raw).unwrap();
577        assert_eq!(rec.tag.as_deref(), Some("SEL"));
578        assert_eq!(rec.sql, "SELECT 1; ");
579    }
580
581    #[test]
582    #[cfg(not(miri))]
583    fn file_encoding_detection_gb18030() {
584        use ::encoding::all::GB18030;
585        use ::encoding::{EncoderTrap, Encoding};
586        use std::io::Write;
587        use tempfile::NamedTempFile;
588
589        let username = "用户";
590        let user_bytes = GB18030
591            .encode(username, EncoderTrap::Strict)
592            .expect("encode");
593
594        let mut line: Vec<u8> =
595            b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
596        line.extend_from_slice(&user_bytes);
597        line.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT\n");
598
599        let mut tmp = NamedTempFile::new().expect("tmp");
600        tmp.write_all(&line).expect("write");
601        tmp.as_file().sync_all().expect("sync");
602
603        let parser = LogParserBuilder::new(tmp.path()).build().expect("open");
604        let rec = parser.iter().unwrap().next().unwrap().unwrap();
605        assert_eq!(rec.username, username);
606    }
607
608    #[test]
609    fn find_indicators_split_exectime_keyword_in_sql_body_no_indicators() {
610        let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT * FROM t WHERE col = 'EXECTIME: slow'\n";
611        let record = parse_record(raw.as_bytes()).unwrap();
612        assert_eq!(record.sql, "SELECT * FROM t WHERE col = 'EXECTIME: slow'\n");
613        assert_eq!(record.exec_id, 0);
614        assert_eq!(record.exectime, 0.0);
615    }
616
617    #[test]
618    fn find_indicators_split_rowcount_keyword_in_sql_body_no_indicators() {
619        let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT * FROM t WHERE cnt = 'ROWCOUNT: many'\n";
620        let record = parse_record(raw.as_bytes()).unwrap();
621        assert_eq!(record.sql, "SELECT * FROM t WHERE cnt = 'ROWCOUNT: many'\n");
622        assert_eq!(record.rowcount, 0);
623    }
624
625    #[test]
626    fn find_indicators_split_exec_id_keyword_in_sql_body_no_indicators() {
627        let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT EXEC_ID: foo FROM dual\n";
628        let record = parse_record(raw.as_bytes()).unwrap();
629        assert_eq!(record.sql, "SELECT EXEC_ID: foo FROM dual\n");
630        assert_eq!(record.exec_id, 0);
631    }
632
633    #[test]
634    fn find_indicators_split_keyword_in_body_plus_real_indicators() {
635        let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) SELECT EXECTIME: slow\nEXECTIME: 5.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 99.\n";
636        let record = parse_record(raw.as_bytes()).unwrap();
637        assert!((record.exectime - 5.0).abs() < 1e-6);
638        assert!(record.sql.contains("SELECT"));
639    }
640
641    #[test]
642    fn find_indicators_split_multiple_keywords_in_body_no_indicators() {
643        let raw = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:0 stmt:0 appname:a) EXECTIME: x ROWCOUNT: y EXEC_ID: z\n";
644        let record = parse_record(raw.as_bytes()).unwrap();
645        assert_eq!(record.sql, "EXECTIME: x ROWCOUNT: y EXEC_ID: z\n");
646        assert_eq!(record.exec_id, 0);
647        assert_eq!(record.exectime, 0.0);
648    }
649
650    #[test]
651    #[cfg(not(miri))]
652    fn encoding_detection_gb18030_after_64kb_boundary() {
653        use ::encoding::all::GB18030;
654        use ::encoding::{EncoderTrap, Encoding};
655        use std::io::Write;
656        use tempfile::NamedTempFile;
657
658        let ascii_record = "2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:ascii trxid:0 stmt:0 appname:app) SELECT 1;\n";
659        let repeat_count = 65536 / ascii_record.len() + 2;
660
661        let username = "用户";
662        let user_bytes = GB18030.encode(username, EncoderTrap::Strict).unwrap();
663        let mut gb_line: Vec<u8> = b"2025-11-17 16:09:42.000 (EP[0] sess:2 thrd:2 user:".to_vec();
664        gb_line.extend_from_slice(&user_bytes);
665        gb_line.extend_from_slice(b" trxid:0 stmt:0 appname:app) SELECT 2;\n");
666
667        let mut tmp = NamedTempFile::new().unwrap();
668        for _ in 0..repeat_count {
669            tmp.write_all(ascii_record.as_bytes()).unwrap();
670        }
671        tmp.write_all(&gb_line).unwrap();
672        tmp.as_file().sync_all().unwrap();
673
674        let parser = LogParserBuilder::new(tmp.path()).build().unwrap();
675        let records: Vec<_> = parser.iter().unwrap().collect();
676        let last = records.last().unwrap().as_ref().unwrap();
677        assert_eq!(last.username, username);
678    }
679
680    #[test]
681    #[cfg(not(miri))]
682    fn file_encoding_detection_utf8() {
683        use std::io::Write;
684        use tempfile::NamedTempFile;
685
686        let username = "用户";
687        let user_bytes = username.as_bytes();
688
689        let mut line: Vec<u8> =
690            b"2025-11-17 16:09:41.123 (EP[2] sess:0xABC thrd:777 user:".to_vec();
691        line.extend_from_slice(user_bytes);
692        line.extend_from_slice(b" trxid:0 stmt:0x2 appname:cli) SELECT\n");
693
694        let mut tmp = NamedTempFile::new().expect("tmp");
695        tmp.write_all(&line).expect("write");
696        tmp.as_file().sync_all().expect("sync");
697
698        let parser = LogParserBuilder::new(tmp.path()).build().expect("open");
699        let rec = parser.iter().unwrap().next().unwrap().unwrap();
700        assert_eq!(rec.username, username);
701    }
702
703    // ── 从 tests/parser_coverage.rs 迁入 ─────────────────────────────────
704
705    /// parse_record with no embedded newline → hits the None branch in is_multiline=true path
706    #[test]
707    fn parse_record_single_line_no_newline() {
708        let raw =
709            b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:U trxid:3 stmt:4 appname:a) SELECT 1";
710        let rec = parse_record(raw).unwrap();
711        assert_eq!(rec.ts, "2025-11-17 16:09:41.123");
712        assert!(rec.sql.contains("SELECT"));
713    }
714
715    /// Record >= 23 bytes with valid timestamp but no `(` → InvalidFormat at meta_start
716    #[test]
717    fn parse_record_no_meta_open_paren() {
718        let raw = b"2025-11-17 16:09:41.123 NO_OPEN_PAREN_AT_ALL_HERE body";
719        let result = parse_record(raw);
720        assert!(result.is_err());
721    }
722
723    /// Record with `(` but no closing `)` → InvalidFormat at meta_end
724    #[test]
725    fn parse_record_no_meta_close_paren() {
726        let raw = b"2025-11-17 16:09:41.123 (UNCLOSED_META body";
727        let result = parse_record(raw);
728        assert!(result.is_err());
729    }
730
731    // ── 从 tests/edge_cases.rs 迁入 ──────────────────────────────────────
732
733    #[test]
734    fn meta_closing_paren_without_space_then_body_on_next_line() {
735        let content = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:app)\nSELECT * FROM T\nEXECTIME: 0(ms) ROWCOUNT: 1(rows) EXEC_ID: 7.\n";
736        let rec = parse_record(content).expect("parse ok");
737        assert!(rec.sql.trim_start().starts_with("SELECT * FROM T"));
738        assert_eq!(rec.exec_id, 7);
739    }
740
741    #[test]
742    fn appname_empty_then_take_next_token_as_appname_not_ip() {
743        let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname: [SEL] ip:::ffff:10.0.0.1) X";
744        let rec = parse_record(raw).unwrap();
745        assert_eq!(rec.appname, "[SEL]");
746        assert_eq!(rec.client_ip, "::ffff:10.0.0.1");
747    }
748
749    #[test]
750    fn indicators_not_strictly_formatted_should_not_split_body() {
751        let raw = b"2025-11-17 16:09:41.123 (EP[0] sess:1 thrd:2 user:u trxid:3 stmt:4 appname:app) SELECT 1; EXEC_ID:123";
752        let rec = parse_record(raw).unwrap();
753        // EXEC_ID:123 无点号结尾,不会被识别为指标,整段作为 SQL body
754        assert_eq!(rec.exec_id, 0);
755        assert!(rec.sql.ends_with("EXEC_ID:123"));
756    }
757
758    // ── 从 tests/parser_errors.rs 迁入 ───────────────────────────────────
759
760    #[test]
761    fn test_parse_record_timestamp_validation() {
762        use crate::error::ParseError;
763
764        let valid = b"2025-11-17 16:09:41.123 (EP[0]) SELECT";
765        let result = parse_record(valid);
766        assert!(result.is_ok());
767
768        let bad_ts_no_meta = b"2025-11-17 16:09:41.123 INVALID NO META";
769        let result = parse_record(bad_ts_no_meta);
770        assert!(matches!(result, Err(ParseError::InvalidFormat { .. })));
771
772        let short = b"2025-11-17 16:0";
773        let result = parse_record(short);
774        assert!(matches!(result, Err(ParseError::InvalidFormat { .. })));
775    }
776}