dm_database_parser_sqllog/
sqllog.rs1use atoi::atoi;
2use encoding::DecoderTrap;
3use encoding::Encoding;
4use encoding::all::GB18030;
5use memchr::{memchr, memrchr};
6use simdutf8::basic::from_utf8 as simd_from_utf8;
7use std::borrow::Cow;
8
9use crate::parser::FileEncodingHint;
10
11#[derive(Debug, Clone, PartialEq, Default)]
15pub struct Sqllog<'a> {
16 pub ts: Cow<'a, str>,
18
19 pub meta_raw: Cow<'a, str>,
21
22 pub content_raw: Cow<'a, [u8]>,
24
25 pub tag: Option<Cow<'a, str>>,
27
28 pub encoding: FileEncodingHint,
30}
31
32impl<'a> Sqllog<'a> {
33 pub fn body(&self) -> Cow<'a, str> {
37 let split = self.find_indicators_split();
38 let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
39 unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) }
41 }
42
43 #[inline]
45 pub fn body_len(&self) -> usize {
46 self.find_indicators_split()
47 }
48
49 #[inline]
51 pub fn body_bytes(&self) -> &[u8] {
52 &self.content_raw[..self.find_indicators_split()]
53 }
54
55 pub fn indicators_raw(&self) -> Option<Cow<'a, str>> {
57 let split = self.find_indicators_split();
58 let ind_bytes = &self.content_raw[split..];
59 if ind_bytes.is_empty() {
60 return None;
61 }
62 let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
63 Some(unsafe { decode_content_bytes(ind_bytes, is_borrowed, self.encoding) })
65 }
66
67 pub fn parse_indicators(&self) -> Option<PerformanceMetrics<'static>> {
69 let ind_bytes = &self.content_raw[self.find_indicators_split()..];
70 if ind_bytes.is_empty() {
71 return None;
72 }
73 parse_indicators_from_bytes(ind_bytes)
74 }
75
76 pub fn parse_performance_metrics(&self) -> PerformanceMetrics<'a> {
86 let split = self.find_indicators_split();
87 let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
88
89 let sql_raw =
91 unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) };
92
93 let sql = if self.tag.as_deref() == Some("ORA") {
94 strip_ora_prefix(sql_raw)
95 } else {
96 sql_raw
97 };
98
99 let mut pm = parse_indicators_from_bytes(&self.content_raw[split..]).unwrap_or_default();
100 pm.sql = sql;
101 pm
102 }
103
104 pub fn parse_meta(&self) -> MetaParts<'a> {
106 let meta_bytes = self.meta_raw.as_bytes();
107 let mut meta = MetaParts::default();
108 let len = meta_bytes.len();
109 let is_borrowed = matches!(&self.meta_raw, Cow::Borrowed(_));
110
111 let to_cow = |bytes: &[u8]| -> Cow<'a, str> {
112 if is_borrowed {
113 unsafe {
114 Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
115 bytes.as_ptr(),
116 bytes.len(),
117 )))
118 }
119 } else {
120 unsafe { Cow::Owned(std::str::from_utf8_unchecked(bytes).to_string()) }
121 }
122 };
123
124 let mut idx = 0;
125 while idx < len {
126 while idx < len && meta_bytes[idx] == b' ' {
128 idx += 1;
129 }
130 if idx >= len {
131 break;
132 }
133
134 let start = idx;
136 while idx < len && meta_bytes[idx] != b' ' {
137 idx += 1;
138 }
139 let part = &meta_bytes[start..idx];
140
141 if part.len() > 4
143 && part[0] == b'E'
144 && part[1] == b'P'
145 && part[2] == b'['
146 && part[part.len() - 1] == b']'
147 {
148 if let Some(ep) = atoi::<u8>(&part[3..part.len() - 1]) {
149 meta.ep = ep;
150 }
151 continue;
152 }
153
154 if let Some(sep) = memchr(b':', part) {
156 let key = &part[..sep];
157 let val = &part[sep + 1..];
158
159 match key {
160 b"sess" => meta.sess_id = to_cow(val),
161 b"thrd" => meta.thrd_id = to_cow(val),
162 b"user" => meta.username = to_cow(val),
163 b"trxid" => meta.trxid = to_cow(val),
164 b"stmt" => meta.statement = to_cow(val),
165 b"ip" => meta.client_ip = to_cow(val),
166 b"appname" => {
167 if !val.is_empty() {
168 meta.appname = to_cow(val);
169 } else {
170 let mut peek = idx;
172 while peek < len && meta_bytes[peek] == b' ' {
173 peek += 1;
174 }
175 if peek < len {
176 let peek_start = peek;
177 while peek < len && meta_bytes[peek] != b' ' {
178 peek += 1;
179 }
180 let next = &meta_bytes[peek_start..peek];
181 if !(next.starts_with(b"ip:") || next.starts_with(b"ip::")) {
182 meta.appname = to_cow(next);
183 idx = peek;
184 }
185 }
186 }
187 }
188 _ => {}
189 }
190 }
191 }
192 meta
193 }
194
195 fn find_indicators_split(&self) -> usize {
198 let data = &self.content_raw;
199 let len = data.len();
200 let start = len.saturating_sub(256);
201 let window = &data[start..len];
202 let mut tail = window.len();
203
204 for keyword in [
206 b"EXEC_ID".as_ref(),
207 b"ROWCOUNT".as_ref(),
208 b"EXECTIME".as_ref(),
209 ] {
210 tail = find_keyword_end_backward(window, tail, keyword).unwrap_or(tail);
211 }
212
213 start + tail
214 }
215}
216
217#[inline]
226unsafe fn decode_content_bytes<'a>(
227 bytes: &[u8],
228 is_borrowed: bool,
229 encoding: FileEncodingHint,
230) -> Cow<'a, str> {
231 match encoding {
232 FileEncodingHint::Utf8 | FileEncodingHint::Auto => match simd_from_utf8(bytes) {
233 Ok(s) => {
234 if is_borrowed {
235 unsafe {
236 Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
237 bytes.as_ptr(),
238 bytes.len(),
239 )))
240 }
241 } else {
242 Cow::Owned(s.to_string())
243 }
244 }
245 Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
246 },
247 FileEncodingHint::Gb18030 => match GB18030.decode(bytes, DecoderTrap::Strict) {
248 Ok(s) => Cow::Owned(s),
249 Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
250 },
251 }
252}
253
254#[inline]
257fn find_keyword_end_backward(window: &[u8], within: usize, keyword: &[u8]) -> Option<usize> {
258 let klen = keyword.len();
259 let mut search_end = within;
260 while let Some(idx) = memrchr(b':', &window[..search_end]) {
261 if idx >= klen
262 && &window[idx - klen..idx] == keyword
263 && idx + 1 < window.len()
264 && window[idx + 1] == b' '
265 {
266 return Some(idx - klen);
267 }
268 if idx == 0 {
269 break;
270 }
271 search_end = idx;
272 }
273 None
274}
275
276fn parse_indicators_from_bytes(ind: &[u8]) -> Option<PerformanceMetrics<'static>> {
280 if ind.is_empty() {
281 return None;
282 }
283
284 let mut out = PerformanceMetrics::default();
285 let mut found = false;
286
287 if let Some(idx) = memchr::memmem::find(ind, b"EXECTIME:") {
288 let ss = idx + 9;
289 if let Some(pi) = memchr(b'(', &ind[ss..]) {
290 let val = trim_ascii(&ind[ss..ss + pi]);
291 if let Ok(t) = unsafe { std::str::from_utf8_unchecked(val) }.parse::<f32>() {
292 out.exectime = t;
293 found = true;
294 }
295 }
296 }
297
298 if let Some(idx) = memchr::memmem::find(ind, b"ROWCOUNT:") {
299 let ss = idx + 9;
300 if let Some(pi) = memchr(b'(', &ind[ss..])
301 && let Some(c) = atoi::<u32>(trim_ascii(&ind[ss..ss + pi]))
302 {
303 out.rowcount = c;
304 found = true;
305 }
306 }
307
308 if let Some(idx) = memchr::memmem::find(ind, b"EXEC_ID:") {
309 let ss = idx + 8;
310 let end = memchr(b'.', &ind[ss..])
311 .map(|i| ss + i)
312 .unwrap_or(ind.len());
313 if let Some(id) = atoi::<i64>(trim_ascii(&ind[ss..end])) {
314 out.exec_id = id;
315 found = true;
316 }
317 }
318
319 found.then_some(out)
320}
321
322#[inline]
324fn strip_ora_prefix(s: Cow<'_, str>) -> Cow<'_, str> {
325 match s {
326 Cow::Borrowed(s) => Cow::Borrowed(s.strip_prefix(": ").unwrap_or(s)),
327 Cow::Owned(s) => match s.strip_prefix(": ") {
328 Some(stripped) => Cow::Owned(stripped.to_string()),
329 None => Cow::Owned(s),
330 },
331 }
332}
333
334#[inline]
336fn trim_ascii(b: &[u8]) -> &[u8] {
337 let mut s = 0;
338 let mut e = b.len();
339 while s < e && b[s] == b' ' {
340 s += 1;
341 }
342 while e > s && b[e - 1] == b' ' {
343 e -= 1;
344 }
345 &b[s..e]
346}
347
348#[derive(Debug, Clone, PartialEq, Default)]
354pub struct MetaParts<'a> {
355 pub ep: u8,
357
358 pub sess_id: Cow<'a, str>,
360
361 pub thrd_id: Cow<'a, str>,
363
364 pub username: Cow<'a, str>,
366
367 pub trxid: Cow<'a, str>,
369
370 pub statement: Cow<'a, str>,
372
373 pub appname: Cow<'a, str>,
375
376 pub client_ip: Cow<'a, str>,
378}
379
380#[derive(Debug, Clone, PartialEq, Default)]
384pub struct PerformanceMetrics<'a> {
385 pub exectime: f32,
387
388 pub rowcount: u32,
390
391 pub exec_id: i64,
393
394 pub sql: Cow<'a, str>,
396}