dm_database_parser_sqllog/
sqllog.rs1use atoi::atoi;
2use encoding::DecoderTrap;
3use encoding::Encoding;
4use encoding::all::GB18030;
5use memchr::memchr;
6use memchr::memmem::Finder;
7use memchr::memrchr;
8use simdutf8::basic::from_utf8 as simd_from_utf8;
9use std::borrow::Cow;
10use std::sync::LazyLock;
11
12use crate::parser::FileEncodingHint;
13
14static FINDER_EXECTIME: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"EXECTIME:"));
16static FINDER_ROWCOUNT: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"ROWCOUNT:"));
17static FINDER_EXEC_ID: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"EXEC_ID:"));
18
19const INDICATORS_WINDOW: usize = 256;
23
24#[derive(Debug, Clone, PartialEq, Default)]
28pub struct Sqllog<'a> {
29 pub ts: Cow<'a, str>,
31
32 pub meta_raw: Cow<'a, str>,
34
35 pub content_raw: Cow<'a, [u8]>,
37
38 pub tag: Option<Cow<'a, str>>,
40
41 pub(crate) encoding: FileEncodingHint,
43}
44
45impl<'a> Sqllog<'a> {
46 pub fn body(&self) -> Cow<'a, str> {
50 let split = self.find_indicators_split();
51 let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
52 unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) }
54 }
55
56 #[inline]
58 pub fn body_len(&self) -> usize {
59 self.find_indicators_split()
60 }
61
62 #[inline]
64 pub fn body_bytes(&self) -> &[u8] {
65 &self.content_raw[..self.find_indicators_split()]
66 }
67
68 pub fn indicators_raw(&self) -> Option<Cow<'a, str>> {
70 let split = self.find_indicators_split();
71 let ind_bytes = &self.content_raw[split..];
72 if ind_bytes.is_empty() {
73 return None;
74 }
75 let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
76 Some(unsafe { decode_content_bytes(ind_bytes, is_borrowed, self.encoding) })
78 }
79
80 pub fn parse_indicators(&self) -> Option<PerformanceMetrics<'static>> {
82 let ind_bytes = &self.content_raw[self.find_indicators_split()..];
83 if ind_bytes.is_empty() {
84 return None;
85 }
86 parse_indicators_from_bytes(ind_bytes)
87 }
88
89 #[inline(always)]
99 pub fn parse_performance_metrics(&self) -> PerformanceMetrics<'a> {
100 let split = self.find_indicators_split();
101 let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
102
103 let sql_raw =
105 unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) };
106
107 let sql = if self.tag.as_deref() == Some("ORA") {
108 strip_ora_prefix(sql_raw)
109 } else {
110 sql_raw
111 };
112
113 let mut pm = parse_indicators_from_bytes(&self.content_raw[split..]).unwrap_or_default();
114 pm.sql = sql;
115 pm
116 }
117
118 pub fn parse_meta(&self) -> MetaParts<'a> {
120 let meta_bytes = self.meta_raw.as_bytes();
121 let mut meta = MetaParts::default();
122 let len = meta_bytes.len();
123 let is_borrowed = matches!(&self.meta_raw, Cow::Borrowed(_));
124
125 let to_cow = |bytes: &[u8]| -> Cow<'a, str> {
126 if is_borrowed {
127 unsafe {
131 Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
132 bytes.as_ptr(),
133 bytes.len(),
134 )))
135 }
136 } else {
137 Cow::Owned(
141 std::str::from_utf8(bytes)
142 .expect("meta_raw is always valid UTF-8")
143 .to_string(),
144 )
145 }
146 };
147
148 let mut idx = 0;
149 while idx < len {
150 while idx < len && meta_bytes[idx] == b' ' {
152 idx += 1;
153 }
154 if idx >= len {
155 break;
156 }
157
158 let start = idx;
160 while idx < len && meta_bytes[idx] != b' ' {
161 idx += 1;
162 }
163 let part = &meta_bytes[start..idx];
164
165 if part.len() > 4
167 && part[0] == b'E'
168 && part[1] == b'P'
169 && part[2] == b'['
170 && part[part.len() - 1] == b']'
171 {
172 if let Some(ep) = atoi::<u8>(&part[3..part.len() - 1]) {
173 meta.ep = ep;
174 }
175 continue;
176 }
177
178 if let Some(sep) = memchr(b':', part) {
180 let key = &part[..sep];
181 let val = &part[sep + 1..];
182
183 match key {
184 b"sess" => meta.sess_id = to_cow(val),
185 b"thrd" => meta.thrd_id = to_cow(val),
186 b"user" => meta.username = to_cow(val),
187 b"trxid" => meta.trxid = to_cow(val),
188 b"stmt" => meta.statement = to_cow(val),
189 b"ip" => meta.client_ip = to_cow(val),
190 b"appname" => {
191 if !val.is_empty() {
192 meta.appname = to_cow(val);
193 } else {
194 let mut peek = idx;
196 while peek < len && meta_bytes[peek] == b' ' {
197 peek += 1;
198 }
199 if peek < len {
200 let peek_start = peek;
201 while peek < len && meta_bytes[peek] != b' ' {
202 peek += 1;
203 }
204 let next = &meta_bytes[peek_start..peek];
205 if !(next.starts_with(b"ip:") || next.starts_with(b"ip::")) {
206 meta.appname = to_cow(next);
207 idx = peek;
208 }
209 }
210 }
211 }
212 _ => {}
213 }
214 }
215 }
216 meta
217 }
218
219 fn find_indicators_split(&self) -> usize {
222 let data = &self.content_raw;
223 let len = data.len();
224
225 let last_meaningful = data
229 .iter()
230 .rev()
231 .find(|&&b| b != b'\n' && b != b'\r')
232 .copied();
233 if last_meaningful != Some(b'.') && last_meaningful != Some(b')') {
234 return len;
235 }
236
237 let start = len.saturating_sub(INDICATORS_WINDOW);
238 let window = &data[start..];
239
240 let earliest = scan_earliest_indicator(window);
243
244 let split = start + earliest;
245 if split < len && parse_indicators_from_bytes(&data[split..]).is_none() {
247 return len;
248 }
249 split
250 }
251}
252
253fn scan_earliest_indicator(window: &[u8]) -> usize {
263 let mut exectime_pos: Option<usize> = None;
265 let mut rowcount_pos: Option<usize> = None;
266 let mut exec_id_pos: Option<usize> = None;
267
268 let mut search_end = window.len();
269 while search_end > 0 {
270 if exectime_pos.is_some() && rowcount_pos.is_some() && exec_id_pos.is_some() {
272 break;
273 }
274 match memrchr(b':', &window[..search_end]) {
275 None => break,
276 Some(colon) => {
277 let prefix = &window[..colon];
278 if exectime_pos.is_none() && prefix.ends_with(b"EXECTIME") {
279 exectime_pos = Some(colon - 8);
280 } else if rowcount_pos.is_none() && prefix.ends_with(b"ROWCOUNT") {
281 rowcount_pos = Some(colon - 8);
282 } else if exec_id_pos.is_none() && prefix.ends_with(b"EXEC_ID") {
283 exec_id_pos = Some(colon - 7);
284 }
285 search_end = colon;
286 }
287 }
288 }
289
290 [exectime_pos, rowcount_pos, exec_id_pos]
292 .into_iter()
293 .flatten()
294 .min()
295 .unwrap_or(window.len())
296}
297
298#[inline]
305unsafe fn decode_content_bytes<'a>(
306 bytes: &[u8],
307 is_borrowed: bool,
308 encoding: FileEncodingHint,
309) -> Cow<'a, str> {
310 match encoding {
311 FileEncodingHint::Utf8 => {
312 if is_borrowed {
314 unsafe {
315 Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
316 bytes.as_ptr(),
317 bytes.len(),
318 )))
319 }
320 } else {
321 unsafe { Cow::Owned(std::str::from_utf8_unchecked(bytes).to_string()) }
322 }
323 }
324 FileEncodingHint::Auto => match simd_from_utf8(bytes) {
325 Ok(_) => {
326 if is_borrowed {
327 unsafe {
328 Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
329 bytes.as_ptr(),
330 bytes.len(),
331 )))
332 }
333 } else {
334 unsafe { Cow::Owned(std::str::from_utf8_unchecked(bytes).to_string()) }
335 }
336 }
337 Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
338 },
339 FileEncodingHint::Gb18030 => match GB18030.decode(bytes, DecoderTrap::Strict) {
340 Ok(s) => Cow::Owned(s),
341 Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
342 },
343 }
344}
345
346fn parse_indicators_from_bytes(ind: &[u8]) -> Option<PerformanceMetrics<'static>> {
350 if ind.is_empty() {
351 return None;
352 }
353
354 let mut out = PerformanceMetrics::default();
355 let mut found = false;
356
357 if let Some(idx) = FINDER_EXECTIME.find(ind) {
358 let ss = idx + 9;
359 if let Some(pi) = memchr(b'(', &ind[ss..]) {
360 let val = ind[ss..ss + pi].trim_ascii();
361 if let Ok(t) = fast_float::parse::<f32, _>(val) {
362 out.exectime = t;
363 found = true;
364 }
365 }
366 }
367
368 if let Some(idx) = FINDER_ROWCOUNT.find(ind) {
369 let ss = idx + 9;
370 if let Some(pi) = memchr(b'(', &ind[ss..])
371 && let Some(c) = atoi::<u32>(ind[ss..ss + pi].trim_ascii())
372 {
373 out.rowcount = c;
374 found = true;
375 }
376 }
377
378 if let Some(idx) = FINDER_EXEC_ID.find(ind) {
379 let ss = idx + 8;
380 let end = memchr(b'.', &ind[ss..])
381 .map(|i| ss + i)
382 .unwrap_or(ind.len());
383 if let Some(id) = atoi::<i64>(ind[ss..end].trim_ascii()) {
384 out.exec_id = id;
385 found = true;
386 }
387 }
388
389 found.then_some(out)
390}
391
392#[inline]
394fn strip_ora_prefix(s: Cow<'_, str>) -> Cow<'_, str> {
395 match s {
396 Cow::Borrowed(inner) => Cow::Borrowed(inner.strip_prefix(": ").unwrap_or(inner)),
397 Cow::Owned(mut inner) => {
398 if inner.starts_with(": ") {
399 inner.drain(..2);
400 }
401 Cow::Owned(inner)
402 }
403 }
404}
405
406#[derive(Debug, Clone, PartialEq, Default)]
412pub struct MetaParts<'a> {
413 pub ep: u8,
415
416 pub sess_id: Cow<'a, str>,
418
419 pub thrd_id: Cow<'a, str>,
421
422 pub username: Cow<'a, str>,
424
425 pub trxid: Cow<'a, str>,
427
428 pub statement: Cow<'a, str>,
430
431 pub appname: Cow<'a, str>,
433
434 pub client_ip: Cow<'a, str>,
436}
437
438#[derive(Debug, Clone, PartialEq, Default)]
442pub struct PerformanceMetrics<'a> {
443 pub exectime: f32,
445
446 pub rowcount: u32,
448
449 pub exec_id: i64,
451
452 pub sql: Cow<'a, str>,
454}