Skip to main content

dm_database_parser_sqllog/
sqllog.rs

1use atoi::atoi;
2use encoding::DecoderTrap;
3use encoding::Encoding;
4use encoding::all::GB18030;
5use memchr::{memchr, memrchr};
6use simdutf8::basic::from_utf8 as simd_from_utf8;
7use std::borrow::Cow;
8
9use crate::parser::FileEncodingHint;
10
11/// SQL 日志记录
12///
13/// 表示一条完整的 SQL 日志记录,包含时间戳、元数据、SQL 语句体和可选的性能指标。
14#[derive(Debug, Clone, PartialEq, Default)]
15pub struct Sqllog<'a> {
16    /// 时间戳,格式为 "YYYY-MM-DD HH:MM:SS.mmm"
17    pub ts: Cow<'a, str>,
18
19    /// 原始元数据字节(延迟解析)
20    pub meta_raw: Cow<'a, str>,
21
22    /// 原始内容(包含 Body 和 Indicators),延迟分割和解析
23    pub content_raw: Cow<'a, [u8]>,
24
25    /// 提取出的方括号标签(例如 [SEL]、[ORA]),若无则为 None
26    pub tag: Option<Cow<'a, str>>,
27
28    /// 文件级编码 hint(由 parser 探测),用于正确解码 content
29    pub encoding: FileEncodingHint,
30}
31
32impl<'a> Sqllog<'a> {
33    // ── 公开 API ─────────────────────────────────────────────────────────────
34
35    /// 获取 SQL 语句体(延迟分割)
36    pub fn body(&self) -> Cow<'a, str> {
37        let split = self.find_indicators_split();
38        let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
39        // SAFETY: body_bytes 是 content_raw 的子切片,与 content_raw 共享 'a 生命周期
40        unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) }
41    }
42
43    /// 获取 SQL 语句体的长度(不做 UTF-8 校验,不分配)
44    #[inline]
45    pub fn body_len(&self) -> usize {
46        self.find_indicators_split()
47    }
48
49    /// 获取 SQL 语句体的原始字节切片(不分配)
50    #[inline]
51    pub fn body_bytes(&self) -> &[u8] {
52        &self.content_raw[..self.find_indicators_split()]
53    }
54
55    /// 获取原始性能指标字符串(延迟分割)
56    pub fn indicators_raw(&self) -> Option<Cow<'a, str>> {
57        let split = self.find_indicators_split();
58        let ind_bytes = &self.content_raw[split..];
59        if ind_bytes.is_empty() {
60            return None;
61        }
62        let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
63        // SAFETY: ind_bytes 是 content_raw 的子切片,与 content_raw 共享 'a 生命周期
64        Some(unsafe { decode_content_bytes(ind_bytes, is_borrowed, self.encoding) })
65    }
66
67    /// 解析性能指标(sql 字段为空字符串)
68    pub fn parse_indicators(&self) -> Option<PerformanceMetrics<'static>> {
69        let ind_bytes = &self.content_raw[self.find_indicators_split()..];
70        if ind_bytes.is_empty() {
71            return None;
72        }
73        parse_indicators_from_bytes(ind_bytes)
74    }
75
76    /// 解析性能指标和 SQL 语句
77    ///
78    /// 返回包含 EXECTIME、ROWCOUNT、EXEC_ID 和 SQL 语句的 [`PerformanceMetrics`]。
79    ///
80    /// 当 tag 为 `"ORA"` 时,SQL 语句开头可能带有 `": "`,本方法会自动去除。
81    ///
82    /// # 实现说明
83    /// 仅调用一次 `find_indicators_split()`,body 解码与 indicators 解析均在同一
84    /// 次遍历中完成,`Cow::Borrowed` 路径全程零分配。
85    pub fn parse_performance_metrics(&self) -> PerformanceMetrics<'a> {
86        let split = self.find_indicators_split();
87        let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
88
89        // SAFETY: 子切片与 content_raw 共享 'a 生命周期
90        let sql_raw =
91            unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) };
92
93        let sql = if self.tag.as_deref() == Some("ORA") {
94            strip_ora_prefix(sql_raw)
95        } else {
96            sql_raw
97        };
98
99        let mut pm = parse_indicators_from_bytes(&self.content_raw[split..]).unwrap_or_default();
100        pm.sql = sql;
101        pm
102    }
103
104    /// 解析元数据
105    pub fn parse_meta(&self) -> MetaParts<'a> {
106        let meta_bytes = self.meta_raw.as_bytes();
107        let mut meta = MetaParts::default();
108        let len = meta_bytes.len();
109        let is_borrowed = matches!(&self.meta_raw, Cow::Borrowed(_));
110
111        let to_cow = |bytes: &[u8]| -> Cow<'a, str> {
112            if is_borrowed {
113                unsafe {
114                    Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
115                        bytes.as_ptr(),
116                        bytes.len(),
117                    )))
118                }
119            } else {
120                unsafe { Cow::Owned(std::str::from_utf8_unchecked(bytes).to_string()) }
121            }
122        };
123
124        let mut idx = 0;
125        while idx < len {
126            // Skip whitespace
127            while idx < len && meta_bytes[idx] == b' ' {
128                idx += 1;
129            }
130            if idx >= len {
131                break;
132            }
133
134            // Find token end
135            let start = idx;
136            while idx < len && meta_bytes[idx] != b' ' {
137                idx += 1;
138            }
139            let part = &meta_bytes[start..idx];
140
141            // Parse EP[n]
142            if part.len() > 4
143                && part[0] == b'E'
144                && part[1] == b'P'
145                && part[2] == b'['
146                && part[part.len() - 1] == b']'
147            {
148                if let Some(ep) = atoi::<u8>(&part[3..part.len() - 1]) {
149                    meta.ep = ep;
150                }
151                continue;
152            }
153
154            // Find ':'
155            if let Some(sep) = memchr(b':', part) {
156                let key = &part[..sep];
157                let val = &part[sep + 1..];
158
159                match key {
160                    b"sess" => meta.sess_id = to_cow(val),
161                    b"thrd" => meta.thrd_id = to_cow(val),
162                    b"user" => meta.username = to_cow(val),
163                    b"trxid" => meta.trxid = to_cow(val),
164                    b"stmt" => meta.statement = to_cow(val),
165                    b"ip" => meta.client_ip = to_cow(val),
166                    b"appname" => {
167                        if !val.is_empty() {
168                            meta.appname = to_cow(val);
169                        } else {
170                            // Peek next token; treat it as appname only if it is not an ip field
171                            let mut peek = idx;
172                            while peek < len && meta_bytes[peek] == b' ' {
173                                peek += 1;
174                            }
175                            if peek < len {
176                                let peek_start = peek;
177                                while peek < len && meta_bytes[peek] != b' ' {
178                                    peek += 1;
179                                }
180                                let next = &meta_bytes[peek_start..peek];
181                                if !(next.starts_with(b"ip:") || next.starts_with(b"ip::")) {
182                                    meta.appname = to_cow(next);
183                                    idx = peek;
184                                }
185                            }
186                        }
187                    }
188                    _ => {}
189                }
190            }
191        }
192        meta
193    }
194
195    // ── Private helpers ───────────────────────────────────────────────────────
196
197    fn find_indicators_split(&self) -> usize {
198        let data = &self.content_raw;
199        let len = data.len();
200        let start = len.saturating_sub(256);
201        let window = &data[start..len];
202        let mut tail = window.len();
203
204        // Search backwards for each keyword in decreasing order of position
205        for keyword in [
206            b"EXEC_ID".as_ref(),
207            b"ROWCOUNT".as_ref(),
208            b"EXECTIME".as_ref(),
209        ] {
210            tail = find_keyword_end_backward(window, tail, keyword).unwrap_or(tail);
211        }
212
213        start + tail
214    }
215}
216
217// ── Module-level helpers ──────────────────────────────────────────────────────
218
219/// Decode a sub-slice of `content_raw` bytes into a `Cow<'a, str>`.
220///
221/// # Safety
222/// `bytes` must be a sub-slice of a `'a`-lived allocation (i.e., the original
223/// `Cow::Borrowed(&'a [u8])`). The caller guarantees this by passing `is_borrowed = true`
224/// only when the source `Cow` is `Borrowed`.
225#[inline]
226unsafe fn decode_content_bytes<'a>(
227    bytes: &[u8],
228    is_borrowed: bool,
229    encoding: FileEncodingHint,
230) -> Cow<'a, str> {
231    match encoding {
232        FileEncodingHint::Utf8 | FileEncodingHint::Auto => match simd_from_utf8(bytes) {
233            Ok(s) => {
234                if is_borrowed {
235                    unsafe {
236                        Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
237                            bytes.as_ptr(),
238                            bytes.len(),
239                        )))
240                    }
241                } else {
242                    Cow::Owned(s.to_string())
243                }
244            }
245            Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
246        },
247        FileEncodingHint::Gb18030 => match GB18030.decode(bytes, DecoderTrap::Strict) {
248            Ok(s) => Cow::Owned(s),
249            Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
250        },
251    }
252}
253
254/// Scan `window[..within]` backwards for `keyword:` followed by a space.
255/// Returns the offset just before the keyword (the new split boundary) if found.
256#[inline]
257fn find_keyword_end_backward(window: &[u8], within: usize, keyword: &[u8]) -> Option<usize> {
258    let klen = keyword.len();
259    let mut search_end = within;
260    while let Some(idx) = memrchr(b':', &window[..search_end]) {
261        if idx >= klen
262            && &window[idx - klen..idx] == keyword
263            && idx + 1 < window.len()
264            && window[idx + 1] == b' '
265        {
266            return Some(idx - klen);
267        }
268        if idx == 0 {
269            break;
270        }
271        search_end = idx;
272    }
273    None
274}
275
276/// Parse `EXECTIME`, `ROWCOUNT`, `EXEC_ID` from a raw indicators byte slice.
277/// The `sql` field of the returned struct is left as the default empty string.
278/// Returns `None` if none of the three fields are present.
279fn parse_indicators_from_bytes(ind: &[u8]) -> Option<PerformanceMetrics<'static>> {
280    if ind.is_empty() {
281        return None;
282    }
283
284    let mut out = PerformanceMetrics::default();
285    let mut found = false;
286
287    if let Some(idx) = memchr::memmem::find(ind, b"EXECTIME:") {
288        let ss = idx + 9;
289        if let Some(pi) = memchr(b'(', &ind[ss..]) {
290            let val = trim_ascii(&ind[ss..ss + pi]);
291            if let Ok(t) = unsafe { std::str::from_utf8_unchecked(val) }.parse::<f32>() {
292                out.exectime = t;
293                found = true;
294            }
295        }
296    }
297
298    if let Some(idx) = memchr::memmem::find(ind, b"ROWCOUNT:") {
299        let ss = idx + 9;
300        if let Some(pi) = memchr(b'(', &ind[ss..])
301            && let Some(c) = atoi::<u32>(trim_ascii(&ind[ss..ss + pi]))
302        {
303            out.rowcount = c;
304            found = true;
305        }
306    }
307
308    if let Some(idx) = memchr::memmem::find(ind, b"EXEC_ID:") {
309        let ss = idx + 8;
310        let end = memchr(b'.', &ind[ss..])
311            .map(|i| ss + i)
312            .unwrap_or(ind.len());
313        if let Some(id) = atoi::<i64>(trim_ascii(&ind[ss..end])) {
314            out.exec_id = id;
315            found = true;
316        }
317    }
318
319    found.then_some(out)
320}
321
322/// Strip a leading `": "` prefix from a `Cow<str>` (zero-alloc for the `Borrowed` path).
323#[inline]
324fn strip_ora_prefix(s: Cow<'_, str>) -> Cow<'_, str> {
325    match s {
326        Cow::Borrowed(s) => Cow::Borrowed(s.strip_prefix(": ").unwrap_or(s)),
327        Cow::Owned(s) => match s.strip_prefix(": ") {
328            Some(stripped) => Cow::Owned(stripped.to_string()),
329            None => Cow::Owned(s),
330        },
331    }
332}
333
334/// ASCII 空格裁剪(不分配,仅返回子切片)
335#[inline]
336fn trim_ascii(b: &[u8]) -> &[u8] {
337    let mut s = 0;
338    let mut e = b.len();
339    while s < e && b[s] == b' ' {
340        s += 1;
341    }
342    while e > s && b[e - 1] == b' ' {
343        e -= 1;
344    }
345    &b[s..e]
346}
347
348// ── Public types ──────────────────────────────────────────────────────────────
349
350/// 元数据部分
351///
352/// 包含日志记录的所有元数据字段,如会话 ID、用户名等。
353#[derive(Debug, Clone, PartialEq, Default)]
354pub struct MetaParts<'a> {
355    /// EP(Execution Point)编号,范围 0-255
356    pub ep: u8,
357
358    /// 会话 ID
359    pub sess_id: Cow<'a, str>,
360
361    /// 线程 ID
362    pub thrd_id: Cow<'a, str>,
363
364    /// 用户名
365    pub username: Cow<'a, str>,
366
367    /// 事务 ID
368    pub trxid: Cow<'a, str>,
369
370    /// 语句 ID
371    pub statement: Cow<'a, str>,
372
373    /// 应用程序名称
374    pub appname: Cow<'a, str>,
375
376    /// 客户端 IP 地址(可选)
377    pub client_ip: Cow<'a, str>,
378}
379
380/// SQL 记录的性能指标和 SQL 语句
381///
382/// 包含 SQL 执行的性能指标,如执行时间、影响行数、执行 ID 和完整的 SQL 语句。
383#[derive(Debug, Clone, PartialEq, Default)]
384pub struct PerformanceMetrics<'a> {
385    /// 执行时间(毫秒)
386    pub exectime: f32,
387
388    /// 影响的行数
389    pub rowcount: u32,
390
391    /// 执行 ID
392    pub exec_id: i64,
393
394    /// 完整的 SQL 语句
395    pub sql: Cow<'a, str>,
396}