dm_database_parser_sqllog/
sqllog.rs1use atoi::atoi;
2use encoding::DecoderTrap;
3use encoding::Encoding;
4use encoding::all::GB18030;
5use memchr::memmem::Finder;
6use memchr::{memchr, memrchr};
7use simdutf8::basic::from_utf8 as simd_from_utf8;
8use std::borrow::Cow;
9use std::sync::LazyLock;
10
11use crate::parser::FileEncodingHint;
12
13static FINDER_EXECTIME: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"EXECTIME:"));
15static FINDER_ROWCOUNT: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"ROWCOUNT:"));
16static FINDER_EXEC_ID: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"EXEC_ID:"));
17
18#[derive(Debug, Clone, PartialEq, Default)]
22pub struct Sqllog<'a> {
23 pub ts: Cow<'a, str>,
25
26 pub meta_raw: Cow<'a, str>,
28
29 pub content_raw: Cow<'a, [u8]>,
31
32 pub tag: Option<Cow<'a, str>>,
34
35 pub(crate) encoding: FileEncodingHint,
37}
38
39impl<'a> Sqllog<'a> {
40 pub fn body(&self) -> Cow<'a, str> {
44 let split = self.find_indicators_split();
45 let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
46 unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) }
48 }
49
50 #[inline]
52 pub fn body_len(&self) -> usize {
53 self.find_indicators_split()
54 }
55
56 #[inline]
58 pub fn body_bytes(&self) -> &[u8] {
59 &self.content_raw[..self.find_indicators_split()]
60 }
61
62 pub fn indicators_raw(&self) -> Option<Cow<'a, str>> {
64 let split = self.find_indicators_split();
65 let ind_bytes = &self.content_raw[split..];
66 if ind_bytes.is_empty() {
67 return None;
68 }
69 let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
70 Some(unsafe { decode_content_bytes(ind_bytes, is_borrowed, self.encoding) })
72 }
73
74 pub fn parse_indicators(&self) -> Option<PerformanceMetrics<'static>> {
76 let ind_bytes = &self.content_raw[self.find_indicators_split()..];
77 if ind_bytes.is_empty() {
78 return None;
79 }
80 parse_indicators_from_bytes(ind_bytes)
81 }
82
83 pub fn parse_performance_metrics(&self) -> PerformanceMetrics<'a> {
93 let split = self.find_indicators_split();
94 let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
95
96 let sql_raw =
98 unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) };
99
100 let sql = if self.tag.as_deref() == Some("ORA") {
101 strip_ora_prefix(sql_raw)
102 } else {
103 sql_raw
104 };
105
106 let mut pm = parse_indicators_from_bytes(&self.content_raw[split..]).unwrap_or_default();
107 pm.sql = sql;
108 pm
109 }
110
111 pub fn parse_meta(&self) -> MetaParts<'a> {
113 let meta_bytes = self.meta_raw.as_bytes();
114 let mut meta = MetaParts::default();
115 let len = meta_bytes.len();
116 let is_borrowed = matches!(&self.meta_raw, Cow::Borrowed(_));
117
118 let to_cow = |bytes: &[u8]| -> Cow<'a, str> {
119 if is_borrowed {
120 unsafe {
121 Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
122 bytes.as_ptr(),
123 bytes.len(),
124 )))
125 }
126 } else {
127 unsafe { Cow::Owned(std::str::from_utf8_unchecked(bytes).to_string()) }
128 }
129 };
130
131 let mut idx = 0;
132 while idx < len {
133 while idx < len && meta_bytes[idx] == b' ' {
135 idx += 1;
136 }
137 if idx >= len {
138 break;
139 }
140
141 let start = idx;
143 while idx < len && meta_bytes[idx] != b' ' {
144 idx += 1;
145 }
146 let part = &meta_bytes[start..idx];
147
148 if part.len() > 4
150 && part[0] == b'E'
151 && part[1] == b'P'
152 && part[2] == b'['
153 && part[part.len() - 1] == b']'
154 {
155 if let Some(ep) = atoi::<u8>(&part[3..part.len() - 1]) {
156 meta.ep = ep;
157 }
158 continue;
159 }
160
161 if let Some(sep) = memchr(b':', part) {
163 let key = &part[..sep];
164 let val = &part[sep + 1..];
165
166 match key {
167 b"sess" => meta.sess_id = to_cow(val),
168 b"thrd" => meta.thrd_id = to_cow(val),
169 b"user" => meta.username = to_cow(val),
170 b"trxid" => meta.trxid = to_cow(val),
171 b"stmt" => meta.statement = to_cow(val),
172 b"ip" => meta.client_ip = to_cow(val),
173 b"appname" => {
174 if !val.is_empty() {
175 meta.appname = to_cow(val);
176 } else {
177 let mut peek = idx;
179 while peek < len && meta_bytes[peek] == b' ' {
180 peek += 1;
181 }
182 if peek < len {
183 let peek_start = peek;
184 while peek < len && meta_bytes[peek] != b' ' {
185 peek += 1;
186 }
187 let next = &meta_bytes[peek_start..peek];
188 if !(next.starts_with(b"ip:") || next.starts_with(b"ip::")) {
189 meta.appname = to_cow(next);
190 idx = peek;
191 }
192 }
193 }
194 }
195 _ => {}
196 }
197 }
198 }
199 meta
200 }
201
202 fn find_indicators_split(&self) -> usize {
205 let data = &self.content_raw;
206 let len = data.len();
207 let start = len.saturating_sub(256);
208 let window = &data[start..len];
209 let mut tail = window.len();
210
211 for keyword in [
213 b"EXEC_ID".as_ref(),
214 b"ROWCOUNT".as_ref(),
215 b"EXECTIME".as_ref(),
216 ] {
217 tail = find_keyword_end_backward(window, tail, keyword).unwrap_or(tail);
218 }
219
220 start + tail
221 }
222}
223
224#[inline]
233unsafe fn decode_content_bytes<'a>(
234 bytes: &[u8],
235 is_borrowed: bool,
236 encoding: FileEncodingHint,
237) -> Cow<'a, str> {
238 match encoding {
239 FileEncodingHint::Utf8 | FileEncodingHint::Auto => match simd_from_utf8(bytes) {
240 Ok(s) => {
241 if is_borrowed {
242 unsafe {
243 Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
244 bytes.as_ptr(),
245 bytes.len(),
246 )))
247 }
248 } else {
249 Cow::Owned(s.to_string())
250 }
251 }
252 Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
253 },
254 FileEncodingHint::Gb18030 => match GB18030.decode(bytes, DecoderTrap::Strict) {
255 Ok(s) => Cow::Owned(s),
256 Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
257 },
258 }
259}
260
261#[inline]
264fn find_keyword_end_backward(window: &[u8], within: usize, keyword: &[u8]) -> Option<usize> {
265 let klen = keyword.len();
266 let mut search_end = within;
267 while let Some(idx) = memrchr(b':', &window[..search_end]) {
268 if idx >= klen
269 && &window[idx - klen..idx] == keyword
270 && idx + 1 < window.len()
271 && window[idx + 1] == b' '
272 {
273 return Some(idx - klen);
274 }
275 if idx == 0 {
276 break;
277 }
278 search_end = idx;
279 }
280 None
281}
282
283fn parse_indicators_from_bytes(ind: &[u8]) -> Option<PerformanceMetrics<'static>> {
287 if ind.is_empty() {
288 return None;
289 }
290
291 let mut out = PerformanceMetrics::default();
292 let mut found = false;
293
294 if let Some(idx) = FINDER_EXECTIME.find(ind) {
295 let ss = idx + 9;
296 if let Some(pi) = memchr(b'(', &ind[ss..]) {
297 let val = ind[ss..ss + pi].trim_ascii();
298 if let Ok(t) = unsafe { std::str::from_utf8_unchecked(val) }.parse::<f32>() {
299 out.exectime = t;
300 found = true;
301 }
302 }
303 }
304
305 if let Some(idx) = FINDER_ROWCOUNT.find(ind) {
306 let ss = idx + 9;
307 if let Some(pi) = memchr(b'(', &ind[ss..])
308 && let Some(c) = atoi::<u32>(ind[ss..ss + pi].trim_ascii())
309 {
310 out.rowcount = c;
311 found = true;
312 }
313 }
314
315 if let Some(idx) = FINDER_EXEC_ID.find(ind) {
316 let ss = idx + 8;
317 let end = memchr(b'.', &ind[ss..])
318 .map(|i| ss + i)
319 .unwrap_or(ind.len());
320 if let Some(id) = atoi::<i64>(ind[ss..end].trim_ascii()) {
321 out.exec_id = id;
322 found = true;
323 }
324 }
325
326 found.then_some(out)
327}
328
329#[inline]
331fn strip_ora_prefix(s: Cow<'_, str>) -> Cow<'_, str> {
332 match s {
333 Cow::Borrowed(inner) => Cow::Borrowed(inner.strip_prefix(": ").unwrap_or(inner)),
334 Cow::Owned(mut inner) => {
335 if inner.starts_with(": ") {
336 inner.drain(..2);
337 }
338 Cow::Owned(inner)
339 }
340 }
341}
342
343#[derive(Debug, Clone, PartialEq, Default)]
349pub struct MetaParts<'a> {
350 pub ep: u8,
352
353 pub sess_id: Cow<'a, str>,
355
356 pub thrd_id: Cow<'a, str>,
358
359 pub username: Cow<'a, str>,
361
362 pub trxid: Cow<'a, str>,
364
365 pub statement: Cow<'a, str>,
367
368 pub appname: Cow<'a, str>,
370
371 pub client_ip: Cow<'a, str>,
373}
374
375#[derive(Debug, Clone, PartialEq, Default)]
379pub struct PerformanceMetrics<'a> {
380 pub exectime: f32,
382
383 pub rowcount: u32,
385
386 pub exec_id: i64,
388
389 pub sql: Cow<'a, str>,
391}