dm_database_parser_sqllog/
sqllog.rs

1use atoi::atoi;
2use memchr::{memchr, memrchr};
3use simdutf8::basic::from_utf8 as simd_from_utf8;
4use std::borrow::Cow;
5
6/// SQL 日志记录
7///
8/// 表示一条完整的 SQL 日志记录,包含时间戳、元数据、SQL 语句体和可选的性能指标。
9///
10
11#[derive(Debug, Clone, PartialEq, Default)]
12pub struct Sqllog<'a> {
13    /// 时间戳,格式为 "YYYY-MM-DD HH:MM:SS.mmm"
14    pub ts: Cow<'a, str>,
15
16    /// 原始元数据字节(延迟解析)
17    pub meta_raw: Cow<'a, str>,
18
19    /// 原始内容(包含 Body 和 Indicators),延迟分割和解析
20    pub content_raw: Cow<'a, [u8]>,
21}
22
23impl<'a> Sqllog<'a> {
24    /// 获取 SQL 语句体(延迟分割)
25    pub fn body(&self) -> Cow<'a, str> {
26        let split = self.find_indicators_split();
27        let body_bytes = &self.content_raw[..split];
28        match simd_from_utf8(body_bytes) {
29            Ok(s) => match &self.content_raw {
30                Cow::Borrowed(_) => unsafe {
31                    let ptr = body_bytes.as_ptr();
32                    let len = body_bytes.len();
33                    let slice = std::slice::from_raw_parts(ptr, len);
34                    Cow::Borrowed(std::str::from_utf8_unchecked(slice))
35                },
36                Cow::Owned(_) => Cow::Owned(s.to_string()),
37            },
38            Err(_) => Cow::Owned(String::from_utf8_lossy(body_bytes).into_owned()),
39        }
40    }
41
42    /// 获取原始性能指标字符串(延迟分割)
43    pub fn indicators_raw(&self) -> Option<Cow<'a, str>> {
44        let split = self.find_indicators_split();
45        let indicators_bytes = &self.content_raw[split..];
46        if indicators_bytes.is_empty() {
47            return None;
48        }
49        match &self.content_raw {
50            Cow::Borrowed(_) => unsafe {
51                let ptr = indicators_bytes.as_ptr();
52                let len = indicators_bytes.len();
53                let slice = std::slice::from_raw_parts(ptr, len);
54                Some(Cow::Borrowed(std::str::from_utf8_unchecked(slice)))
55            },
56            Cow::Owned(_) => unsafe {
57                Some(Cow::Owned(
58                    std::str::from_utf8_unchecked(indicators_bytes).to_string(),
59                ))
60            },
61        }
62    }
63
64    fn find_indicators_split(&self) -> usize {
65        let body = &self.content_raw;
66        let current_len = body.len();
67        let search_limit = 256;
68        let start_search = current_len.saturating_sub(search_limit);
69        let search_slice = &body[start_search..current_len];
70
71        let mut tail_len = search_slice.len();
72
73        // 1. EXEC_ID
74        let mut search_end = tail_len;
75        while let Some(idx) = memrchr(b':', &search_slice[..search_end]) {
76            if idx >= 7
77                && &search_slice[idx - 7..idx] == b"EXEC_ID"
78                && idx + 1 < search_slice.len()
79                && search_slice[idx + 1] == b' '
80            {
81                tail_len = idx - 7;
82                break;
83            }
84            if idx == 0 {
85                break;
86            }
87            search_end = idx;
88        }
89
90        // 2. ROWCOUNT
91        let slice_view = &search_slice[..tail_len];
92        search_end = slice_view.len();
93        while let Some(idx) = memrchr(b':', &slice_view[..search_end]) {
94            if idx >= 8
95                && &search_slice[idx - 8..idx] == b"ROWCOUNT"
96                && idx + 1 < search_slice.len()
97                && search_slice[idx + 1] == b' '
98            {
99                tail_len = idx - 8;
100                break;
101            }
102            if idx == 0 {
103                break;
104            }
105            search_end = idx;
106        }
107
108        // 3. EXECTIME
109        let slice_view = &search_slice[..tail_len];
110        search_end = slice_view.len();
111        while let Some(idx) = memrchr(b':', &slice_view[..search_end]) {
112            if idx >= 8
113                && &search_slice[idx - 8..idx] == b"EXECTIME"
114                && idx + 1 < search_slice.len()
115                && search_slice[idx + 1] == b' '
116            {
117                tail_len = idx - 8;
118                break;
119            }
120            if idx == 0 {
121                break;
122            }
123            search_end = idx;
124        }
125
126        start_search + tail_len
127    }
128
129    /// 解析性能指标
130    pub fn parse_indicators(&self) -> Option<IndicatorsParts> {
131        let raw_cow = self.indicators_raw()?;
132        let raw = raw_cow.as_ref();
133        let bytes = raw.as_bytes();
134
135        // We need to parse the indicators from the raw string.
136        // The format is "EXECTIME: ... ROWCOUNT: ... EXEC_ID: ..."
137        // But the order might vary or some might be missing?
138        // The parser logic in parser.rs handled this by searching backwards.
139        // We should duplicate that logic here or move it here.
140        // Since we want to keep parser.rs focused on splitting, let's implement parsing here.
141
142        let mut indicators = IndicatorsParts::default();
143        let mut has_indicators = false;
144
145        // Helper to trim
146        fn trim(b: &[u8]) -> &[u8] {
147            let start = b
148                .iter()
149                .position(|&x| !x.is_ascii_whitespace())
150                .unwrap_or(0);
151            let end = b
152                .iter()
153                .rposition(|&x| !x.is_ascii_whitespace())
154                .map(|i| i + 1)
155                .unwrap_or(start);
156            &b[start..end]
157        }
158
159        // We can use a simple forward scan or regex-like search since we have the isolated string.
160        // "EXECTIME: 1.0(ms) ROWCOUNT: 1(rows) EXEC_ID: 100."
161
162        // Parse EXECTIME
163        if let Some(idx) = memchr::memmem::find(bytes, b"EXECTIME:")
164            && let Some(end) = memchr(b'(', &bytes[idx..])
165        {
166            let val_bytes = &bytes[idx + 9..idx + end]; // 9 is len of "EXECTIME:"
167            let val_trimmed = trim(val_bytes);
168            // unsafe is fine as we trust the source from parser
169            let s = unsafe { std::str::from_utf8_unchecked(val_trimmed) };
170            if let Ok(time) = s.parse::<f32>() {
171                indicators.execute_time = time;
172                has_indicators = true;
173            }
174        }
175
176        // Parse ROWCOUNT
177        if let Some(idx) = memchr::memmem::find(bytes, b"ROWCOUNT:")
178            && let Some(end) = memchr(b'(', &bytes[idx..])
179        {
180            let val_bytes = &bytes[idx + 9..idx + end];
181            let val_trimmed = trim(val_bytes);
182            if let Some(count) = atoi::<u32>(val_trimmed) {
183                indicators.row_count = count;
184                has_indicators = true;
185            }
186        }
187
188        // Parse EXEC_ID
189        if let Some(idx) = memchr::memmem::find(bytes, b"EXEC_ID:") {
190            // Ends with . or end of string
191            let suffix = &bytes[idx + 8..];
192            let end = memchr(b'.', suffix).unwrap_or(suffix.len());
193            let val_bytes = &suffix[..end];
194            let val_trimmed = trim(val_bytes);
195            if let Some(id) = atoi::<i64>(val_trimmed) {
196                indicators.execute_id = id;
197                has_indicators = true;
198            }
199        }
200
201        if has_indicators {
202            Some(indicators)
203        } else {
204            None
205        }
206    }
207
208    /// 解析元数据
209    pub fn parse_meta(&self) -> MetaParts<'a> {
210        let meta_bytes = self.meta_raw.as_bytes();
211        let mut meta = MetaParts::default();
212        let mut idx = 0;
213        let len = meta_bytes.len();
214
215        while idx < len {
216            // Skip whitespace
217            while idx < len && meta_bytes[idx].is_ascii_whitespace() {
218                idx += 1;
219            }
220            if idx >= len {
221                break;
222            }
223
224            let start = idx;
225            // Find end of token using memchr for space (optimization)
226            let end = match memchr(b' ', &meta_bytes[idx..]) {
227                Some(i) => idx + i,
228                None => len,
229            };
230
231            let part = &meta_bytes[start..end];
232            idx = end;
233
234            if part.starts_with(b"EP[") && part.ends_with(b"]") {
235                // EP[0]
236                let num_bytes = &part[3..part.len() - 1];
237                if let Some(ep) = atoi::<u8>(num_bytes) {
238                    meta.ep = ep;
239                }
240                continue;
241            }
242
243            if let Some(sep_idx) = memchr(b':', part) {
244                let key = &part[0..sep_idx];
245                let val = &part[sep_idx + 1..];
246
247                // Helper to convert bytes to Cow using unsafe for known ASCII keys
248                let to_cow_trusted = |bytes: &[u8]| -> Cow<'a, str> {
249                    // We need to extend the lifetime of bytes to 'a.
250                    // Since meta_raw is Cow<'a, str>, if it's Borrowed, the bytes are &'a [u8].
251                    // If it's Owned, we can't return Cow::Borrowed referencing it easily without unsafe.
252                    // But wait, self.meta_raw is Cow<'a, str>.
253                    // If self.meta_raw is Borrowed(&'a str), then bytes are from that slice, so they are &'a [u8].
254                    // If self.meta_raw is Owned(String), then bytes are from that String. We can't return Cow::Borrowed(&'a str) pointing to it.
255
256                    // This is the tricky part of lazy parsing with Cow.
257                    // If we have Owned data, we must return Owned data or clone.
258                    // But parse_meta returns MetaParts<'a>.
259
260                    // If self.meta_raw is Borrowed, we can return Borrowed.
261                    // If self.meta_raw is Owned, we MUST return Owned.
262
263                    match &self.meta_raw {
264                        Cow::Borrowed(_) => unsafe {
265                            // Reconstruct the lifetime 'a
266                            // We know bytes points into self.meta_raw which is 'a
267                            let ptr = bytes.as_ptr();
268                            let len = bytes.len();
269                            let slice = std::slice::from_raw_parts(ptr, len);
270                            Cow::Borrowed(std::str::from_utf8_unchecked(slice))
271                        },
272                        Cow::Owned(_) => {
273                            // We must allocate
274                            unsafe { Cow::Owned(std::str::from_utf8_unchecked(bytes).to_string()) }
275                        }
276                    }
277                };
278
279                let to_cow = |bytes: &[u8]| -> Cow<'a, str> {
280                    match &self.meta_raw {
281                        Cow::Borrowed(_) => match simd_from_utf8(bytes) {
282                            Ok(_) => unsafe {
283                                let ptr = bytes.as_ptr();
284                                let len = bytes.len();
285                                let slice = std::slice::from_raw_parts(ptr, len);
286                                Cow::Borrowed(std::str::from_utf8_unchecked(slice))
287                            },
288                            Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
289                        },
290                        Cow::Owned(_) => match simd_from_utf8(bytes) {
291                            Ok(s) => Cow::Owned(s.to_string()),
292                            Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
293                        },
294                    }
295                };
296
297                match key {
298                    b"sess" => meta.sess_id = to_cow_trusted(val),
299                    b"thrd" => meta.thrd_id = to_cow_trusted(val),
300                    b"user" => meta.username = to_cow(val),
301                    b"trxid" => meta.trxid = to_cow_trusted(val),
302                    b"stmt" => meta.statement = to_cow_trusted(val),
303                    b"appname" => {
304                        if val.is_empty() {
305                            let mut next_idx = idx;
306                            while next_idx < len && meta_bytes[next_idx].is_ascii_whitespace() {
307                                next_idx += 1;
308                            }
309                            if next_idx < len {
310                                let next_start = next_idx;
311                                let next_end = match memchr(b' ', &meta_bytes[next_idx..]) {
312                                    Some(i) => next_idx + i,
313                                    None => len,
314                                };
315                                let next_part = &meta_bytes[next_start..next_end];
316
317                                if next_part.starts_with(b"ip:") && !next_part.starts_with(b"ip::")
318                                {
319                                    // Next part is ip key
320                                } else {
321                                    meta.appname = to_cow(next_part);
322                                    idx = next_end;
323                                }
324                            }
325                        } else {
326                            meta.appname = to_cow(val);
327                        }
328                    }
329                    b"ip" => {
330                        meta.client_ip = to_cow_trusted(val);
331                    }
332                    _ => {}
333                }
334            }
335        }
336        meta
337    }
338}
339
340/// 元数据部分
341///
342/// 包含日志记录的所有元数据字段,如会话 ID、用户名等。
343#[derive(Debug, Clone, PartialEq, Default)]
344pub struct MetaParts<'a> {
345    /// EP(Execution Point)编号,范围 0-255
346    pub ep: u8,
347
348    /// 会话 ID
349    pub sess_id: Cow<'a, str>,
350
351    /// 线程 ID
352    pub thrd_id: Cow<'a, str>,
353
354    /// 用户名
355    pub username: Cow<'a, str>,
356
357    /// 事务 ID
358    pub trxid: Cow<'a, str>,
359
360    /// 语句 ID
361    pub statement: Cow<'a, str>,
362
363    /// 应用程序名称
364    pub appname: Cow<'a, str>,
365
366    /// 客户端 IP 地址(可选)
367    pub client_ip: Cow<'a, str>,
368}
369
370/// 性能指标部分
371///
372/// 包含 SQL 执行的性能指标,如执行时间、影响行数等。
373///
374
375#[derive(Debug, Clone, Copy, PartialEq, Default)]
376pub struct IndicatorsParts {
377    /// 执行时间(毫秒)
378    pub execute_time: f32,
379
380    /// 影响的行数
381    pub row_count: u32,
382
383    /// 执行 ID
384    pub execute_id: i64,
385}