Skip to main content

dm_database_parser_sqllog/
sqllog.rs

1use atoi::atoi;
2use encoding::DecoderTrap;
3use encoding::Encoding;
4use encoding::all::GB18030;
5use memchr::{memchr, memrchr};
6use simdutf8::basic::from_utf8 as simd_from_utf8;
7use std::borrow::Cow;
8
9/// SQL 日志记录
10///
11/// 表示一条完整的 SQL 日志记录,包含时间戳、元数据、SQL 语句体和可选的性能指标。
12///
13///
14#[derive(Debug, Clone, PartialEq, Default)]
15pub struct Sqllog<'a> {
16    /// 时间戳,格式为 "YYYY-MM-DD HH:MM:SS.mmm"
17    pub ts: Cow<'a, str>,
18
19    /// 原始元数据字节(延迟解析)
20    pub meta_raw: Cow<'a, str>,
21
22    /// 原始内容(包含 Body 和 Indicators),延迟分割和解析
23    pub content_raw: Cow<'a, [u8]>,
24
25    /// 提取出的方括号标签(例如 [SEL]、[ORA]),若无则为 None
26    pub tag: Option<Cow<'a, str>>,
27
28    /// 文件级编码 hint(由 parser 探测),用于正确解码 content
29    pub encoding: crate::parser::FileEncodingHint,
30}
31
32impl<'a> Sqllog<'a> {
33    /// 获取 SQL 语句体(延迟分割)
34    pub fn body(&self) -> Cow<'a, str> {
35        let split = self.find_indicators_split();
36        let body_bytes = &self.content_raw[..split];
37
38        match self.encoding {
39            crate::parser::FileEncodingHint::Utf8 | crate::parser::FileEncodingHint::Auto => {
40                match simd_from_utf8(body_bytes) {
41                    Ok(s) => match &self.content_raw {
42                        Cow::Borrowed(_) => unsafe {
43                            let ptr = body_bytes.as_ptr();
44                            let len = body_bytes.len();
45                            let slice = std::slice::from_raw_parts(ptr, len);
46                            Cow::Borrowed(std::str::from_utf8_unchecked(slice))
47                        },
48                        Cow::Owned(_) => Cow::Owned(s.to_string()),
49                    },
50                    Err(_) => Cow::Owned(String::from_utf8_lossy(body_bytes).into_owned()),
51                }
52            }
53            crate::parser::FileEncodingHint::Gb18030 => {
54                // Decode using GB18030 and return owned string
55                match GB18030.decode(body_bytes, DecoderTrap::Strict) {
56                    Ok(s) => Cow::Owned(s),
57                    Err(_) => Cow::Owned(String::from_utf8_lossy(body_bytes).into_owned()),
58                }
59            }
60        }
61    }
62
63    /// 获取 SQL 语句体的长度(不做 UTF-8 校验,不分配)
64    #[inline]
65    pub fn body_len(&self) -> usize {
66        self.find_indicators_split()
67    }
68
69    /// 获取 SQL 语句体的原始字节切片(不分配)
70    #[inline]
71    pub fn body_bytes(&self) -> &[u8] {
72        let split = self.find_indicators_split();
73        &self.content_raw[..split]
74    }
75
76    /// 获取原始性能指标字符串(延迟分割)
77    pub fn indicators_raw(&self) -> Option<Cow<'a, str>> {
78        let split = self.find_indicators_split();
79        let indicators_bytes = &self.content_raw[split..];
80        if indicators_bytes.is_empty() {
81            return None;
82        }
83
84        match self.encoding {
85            crate::parser::FileEncodingHint::Utf8 | crate::parser::FileEncodingHint::Auto => {
86                match &self.content_raw {
87                    Cow::Borrowed(_) => unsafe {
88                        let ptr = indicators_bytes.as_ptr();
89                        let len = indicators_bytes.len();
90                        let slice = std::slice::from_raw_parts(ptr, len);
91                        Some(Cow::Borrowed(std::str::from_utf8_unchecked(slice)))
92                    },
93                    Cow::Owned(_) => unsafe {
94                        Some(Cow::Owned(
95                            std::str::from_utf8_unchecked(indicators_bytes).to_string(),
96                        ))
97                    },
98                }
99            }
100            crate::parser::FileEncodingHint::Gb18030 => {
101                match GB18030.decode(indicators_bytes, DecoderTrap::Strict) {
102                    Ok(s) => Some(Cow::Owned(s)),
103                    Err(_) => Some(Cow::Owned(
104                        String::from_utf8_lossy(indicators_bytes).into_owned(),
105                    )),
106                }
107            }
108        }
109    }
110
111    fn find_indicators_split(&self) -> usize {
112        let body = &self.content_raw;
113        let current_len = body.len();
114        let search_limit = 256;
115        let start_search = current_len.saturating_sub(search_limit);
116        let search_slice = &body[start_search..current_len];
117
118        let mut tail_len = search_slice.len();
119
120        // 1. EXEC_ID
121        let mut search_end = tail_len;
122        while let Some(idx) = memrchr(b':', &search_slice[..search_end]) {
123            if idx >= 7
124                && &search_slice[idx - 7..idx] == b"EXEC_ID"
125                && idx + 1 < search_slice.len()
126                && search_slice[idx + 1] == b' '
127            {
128                tail_len = idx - 7;
129                break;
130            }
131            if idx == 0 {
132                break;
133            }
134            search_end = idx;
135        }
136
137        // 2. ROWCOUNT
138        let slice_view = &search_slice[..tail_len];
139        search_end = slice_view.len();
140        while let Some(idx) = memrchr(b':', &slice_view[..search_end]) {
141            if idx >= 8
142                && &search_slice[idx - 8..idx] == b"ROWCOUNT"
143                && idx + 1 < search_slice.len()
144                && search_slice[idx + 1] == b' '
145            {
146                tail_len = idx - 8;
147                break;
148            }
149            if idx == 0 {
150                break;
151            }
152            search_end = idx;
153        }
154
155        // 3. EXECTIME
156        let slice_view = &search_slice[..tail_len];
157        search_end = slice_view.len();
158        while let Some(idx) = memrchr(b':', &slice_view[..search_end]) {
159            if idx >= 8
160                && &search_slice[idx - 8..idx] == b"EXECTIME"
161                && idx + 1 < search_slice.len()
162                && search_slice[idx + 1] == b' '
163            {
164                tail_len = idx - 8;
165                break;
166            }
167            if idx == 0 {
168                break;
169            }
170            search_end = idx;
171        }
172
173        start_search + tail_len
174    }
175
176    /// 解析性能指标
177    pub fn parse_indicators(&self) -> Option<IndicatorsParts> {
178        let split = self.find_indicators_split();
179        let indicators_bytes = &self.content_raw[split..];
180        if indicators_bytes.is_empty() {
181            return None;
182        }
183
184        let mut indicators = IndicatorsParts::default();
185        let mut has_indicators = false;
186
187        // Parse EXECTIME
188        if let Some(idx) = memchr::memmem::find(indicators_bytes, b"EXECTIME:") {
189            // Find '(' after EXECTIME:
190            let search_start = idx + 9;
191            if let Some(paren_idx) = memchr(b'(', &indicators_bytes[search_start..]) {
192                let val_bytes = &indicators_bytes[search_start..search_start + paren_idx];
193                // Trim manually for speed
194                let mut start = 0;
195                let mut end = val_bytes.len();
196                while start < end && val_bytes[start] == b' ' {
197                    start += 1;
198                }
199                while end > start && val_bytes[end - 1] == b' ' {
200                    end -= 1;
201                }
202                if start < end {
203                    let s = unsafe { std::str::from_utf8_unchecked(&val_bytes[start..end]) };
204                    if let Ok(time) = s.parse::<f32>() {
205                        indicators.execute_time = time;
206                        has_indicators = true;
207                    }
208                }
209            }
210        }
211
212        // Parse ROWCOUNT
213        if let Some(idx) = memchr::memmem::find(indicators_bytes, b"ROWCOUNT:") {
214            let search_start = idx + 9;
215            if let Some(paren_idx) = memchr(b'(', &indicators_bytes[search_start..]) {
216                let val_bytes = &indicators_bytes[search_start..search_start + paren_idx];
217                let mut start = 0;
218                let mut end = val_bytes.len();
219                while start < end && val_bytes[start] == b' ' {
220                    start += 1;
221                }
222                while end > start && val_bytes[end - 1] == b' ' {
223                    end -= 1;
224                }
225                if start < end
226                    && let Some(count) = atoi::<u32>(&val_bytes[start..end])
227                {
228                    indicators.row_count = count;
229                    has_indicators = true;
230                }
231            }
232        }
233
234        // Parse EXEC_ID
235        if let Some(idx) = memchr::memmem::find(indicators_bytes, b"EXEC_ID:") {
236            let search_start = idx + 8;
237            let end = memchr(b'.', &indicators_bytes[search_start..])
238                .map(|i| search_start + i)
239                .unwrap_or(indicators_bytes.len());
240            let val_bytes = &indicators_bytes[search_start..end];
241            let mut start = 0;
242            let mut trimmed_end = val_bytes.len();
243            while start < trimmed_end && val_bytes[start] == b' ' {
244                start += 1;
245            }
246            while trimmed_end > start && val_bytes[trimmed_end - 1] == b' ' {
247                trimmed_end -= 1;
248            }
249            if start < trimmed_end
250                && let Some(id) = atoi::<i64>(&val_bytes[start..trimmed_end])
251            {
252                indicators.execute_id = id;
253                has_indicators = true;
254            }
255        }
256
257        if has_indicators {
258            Some(indicators)
259        } else {
260            None
261        }
262    }
263
264    /// 解析元数据
265    pub fn parse_meta(&self) -> MetaParts<'a> {
266        let meta_bytes = self.meta_raw.as_bytes();
267        let mut meta = MetaParts::default();
268        let len = meta_bytes.len();
269
270        // Determine if we're working with borrowed or owned data once
271        let is_borrowed = matches!(&self.meta_raw, Cow::Borrowed(_));
272
273        // Fast path: single pass through meta_bytes with manual tokenization
274        let mut idx = 0;
275        while idx < len {
276            // Skip whitespace
277            while idx < len && meta_bytes[idx] == b' ' {
278                idx += 1;
279            }
280            if idx >= len {
281                break;
282            }
283
284            // Find token end
285            let start = idx;
286            while idx < len && meta_bytes[idx] != b' ' {
287                idx += 1;
288            }
289            let part = &meta_bytes[start..idx];
290
291            // Parse EP[n]
292            if part.len() > 4
293                && part[0] == b'E'
294                && part[1] == b'P'
295                && part[2] == b'['
296                && part[part.len() - 1] == b']'
297            {
298                if let Some(ep) = atoi::<u8>(&part[3..part.len() - 1]) {
299                    meta.ep = ep;
300                }
301                continue;
302            }
303
304            // Find ':'
305            if let Some(sep) = memchr(b':', part) {
306                let key = &part[..sep];
307                let val = &part[sep + 1..];
308
309                // Fast conversion: no validation, direct unsafe cast
310                let to_cow = |bytes: &[u8]| -> Cow<'a, str> {
311                    if is_borrowed {
312                        unsafe {
313                            Cow::Borrowed(std::str::from_utf8_unchecked(
314                                std::slice::from_raw_parts(bytes.as_ptr(), bytes.len()),
315                            ))
316                        }
317                    } else {
318                        unsafe { Cow::Owned(std::str::from_utf8_unchecked(bytes).to_string()) }
319                    }
320                };
321
322                match key {
323                    b"sess" => meta.sess_id = to_cow(val),
324                    b"thrd" => meta.thrd_id = to_cow(val),
325                    b"user" => meta.username = to_cow(val),
326                    b"trxid" => meta.trxid = to_cow(val),
327                    b"stmt" => meta.statement = to_cow(val),
328                    b"ip" => meta.client_ip = to_cow(val),
329                    b"appname" => {
330                        if !val.is_empty() {
331                            meta.appname = to_cow(val);
332                        } else {
333                            // Peek next token
334                            let mut peek_idx = idx;
335                            while peek_idx < len && meta_bytes[peek_idx] == b' ' {
336                                peek_idx += 1;
337                            }
338                            if peek_idx < len {
339                                let peek_start = peek_idx;
340                                while peek_idx < len && meta_bytes[peek_idx] != b' ' {
341                                    peek_idx += 1;
342                                }
343                                let next_part = &meta_bytes[peek_start..peek_idx];
344                                // If the next token is an ip (single or double/triple colon forms), do NOT treat it as appname
345                                if !(next_part.starts_with(b"ip:")
346                                    || next_part.starts_with(b"ip::"))
347                                {
348                                    meta.appname = to_cow(next_part);
349                                    idx = peek_idx;
350                                }
351                            }
352                        }
353                    }
354                    _ => {}
355                }
356            }
357        }
358        meta
359    }
360}
361
362/// 元数据部分
363///
364/// 包含日志记录的所有元数据字段,如会话 ID、用户名等。
365#[derive(Debug, Clone, PartialEq, Default)]
366pub struct MetaParts<'a> {
367    /// EP(Execution Point)编号,范围 0-255
368    pub ep: u8,
369
370    /// 会话 ID
371    pub sess_id: Cow<'a, str>,
372
373    /// 线程 ID
374    pub thrd_id: Cow<'a, str>,
375
376    /// 用户名
377    pub username: Cow<'a, str>,
378
379    /// 事务 ID
380    pub trxid: Cow<'a, str>,
381
382    /// 语句 ID
383    pub statement: Cow<'a, str>,
384
385    /// 应用程序名称
386    pub appname: Cow<'a, str>,
387
388    /// 客户端 IP 地址(可选)
389    pub client_ip: Cow<'a, str>,
390}
391
392/// 性能指标部分
393///
394/// 包含 SQL 执行的性能指标,如执行时间、影响行数等。
395///
396
397#[derive(Debug, Clone, Copy, PartialEq, Default)]
398pub struct IndicatorsParts {
399    /// 执行时间(毫秒)
400    pub execute_time: f32,
401
402    /// 影响的行数
403    pub row_count: u32,
404
405    /// 执行 ID
406    pub execute_id: i64,
407}