use atoi::atoi;
use encoding::DecoderTrap;
use encoding::Encoding;
use encoding::all::GB18030;
use memchr::memmem::Finder;
use memchr::{memchr, memrchr};
use simdutf8::basic::from_utf8 as simd_from_utf8;
use std::borrow::Cow;
use std::sync::LazyLock;
use crate::parser::FileEncodingHint;
static FINDER_EXECTIME: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"EXECTIME:"));
static FINDER_ROWCOUNT: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"ROWCOUNT:"));
static FINDER_EXEC_ID: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"EXEC_ID:"));
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Sqllog<'a> {
pub ts: Cow<'a, str>,
pub meta_raw: Cow<'a, str>,
pub content_raw: Cow<'a, [u8]>,
pub tag: Option<Cow<'a, str>>,
pub encoding: FileEncodingHint,
}
impl<'a> Sqllog<'a> {
pub fn body(&self) -> Cow<'a, str> {
let split = self.find_indicators_split();
let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) }
}
#[inline]
pub fn body_len(&self) -> usize {
self.find_indicators_split()
}
#[inline]
pub fn body_bytes(&self) -> &[u8] {
&self.content_raw[..self.find_indicators_split()]
}
pub fn indicators_raw(&self) -> Option<Cow<'a, str>> {
let split = self.find_indicators_split();
let ind_bytes = &self.content_raw[split..];
if ind_bytes.is_empty() {
return None;
}
let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
Some(unsafe { decode_content_bytes(ind_bytes, is_borrowed, self.encoding) })
}
pub fn parse_indicators(&self) -> Option<PerformanceMetrics<'static>> {
let ind_bytes = &self.content_raw[self.find_indicators_split()..];
if ind_bytes.is_empty() {
return None;
}
parse_indicators_from_bytes(ind_bytes)
}
pub fn parse_performance_metrics(&self) -> PerformanceMetrics<'a> {
let split = self.find_indicators_split();
let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
let sql_raw =
unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) };
let sql = if self.tag.as_deref() == Some("ORA") {
strip_ora_prefix(sql_raw)
} else {
sql_raw
};
let mut pm = parse_indicators_from_bytes(&self.content_raw[split..]).unwrap_or_default();
pm.sql = sql;
pm
}
pub fn parse_meta(&self) -> MetaParts<'a> {
let meta_bytes = self.meta_raw.as_bytes();
let mut meta = MetaParts::default();
let len = meta_bytes.len();
let is_borrowed = matches!(&self.meta_raw, Cow::Borrowed(_));
let to_cow = |bytes: &[u8]| -> Cow<'a, str> {
if is_borrowed {
unsafe {
Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
bytes.as_ptr(),
bytes.len(),
)))
}
} else {
unsafe { Cow::Owned(std::str::from_utf8_unchecked(bytes).to_string()) }
}
};
let mut idx = 0;
while idx < len {
while idx < len && meta_bytes[idx] == b' ' {
idx += 1;
}
if idx >= len {
break;
}
let start = idx;
while idx < len && meta_bytes[idx] != b' ' {
idx += 1;
}
let part = &meta_bytes[start..idx];
if part.len() > 4
&& part[0] == b'E'
&& part[1] == b'P'
&& part[2] == b'['
&& part[part.len() - 1] == b']'
{
if let Some(ep) = atoi::<u8>(&part[3..part.len() - 1]) {
meta.ep = ep;
}
continue;
}
if let Some(sep) = memchr(b':', part) {
let key = &part[..sep];
let val = &part[sep + 1..];
match key {
b"sess" => meta.sess_id = to_cow(val),
b"thrd" => meta.thrd_id = to_cow(val),
b"user" => meta.username = to_cow(val),
b"trxid" => meta.trxid = to_cow(val),
b"stmt" => meta.statement = to_cow(val),
b"ip" => meta.client_ip = to_cow(val),
b"appname" => {
if !val.is_empty() {
meta.appname = to_cow(val);
} else {
let mut peek = idx;
while peek < len && meta_bytes[peek] == b' ' {
peek += 1;
}
if peek < len {
let peek_start = peek;
while peek < len && meta_bytes[peek] != b' ' {
peek += 1;
}
let next = &meta_bytes[peek_start..peek];
if !(next.starts_with(b"ip:") || next.starts_with(b"ip::")) {
meta.appname = to_cow(next);
idx = peek;
}
}
}
}
_ => {}
}
}
}
meta
}
fn find_indicators_split(&self) -> usize {
let data = &self.content_raw;
let len = data.len();
let start = len.saturating_sub(256);
let window = &data[start..len];
let mut tail = window.len();
for keyword in [
b"EXEC_ID".as_ref(),
b"ROWCOUNT".as_ref(),
b"EXECTIME".as_ref(),
] {
tail = find_keyword_end_backward(window, tail, keyword).unwrap_or(tail);
}
start + tail
}
}
#[inline]
unsafe fn decode_content_bytes<'a>(
bytes: &[u8],
is_borrowed: bool,
encoding: FileEncodingHint,
) -> Cow<'a, str> {
match encoding {
FileEncodingHint::Utf8 | FileEncodingHint::Auto => match simd_from_utf8(bytes) {
Ok(s) => {
if is_borrowed {
unsafe {
Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
bytes.as_ptr(),
bytes.len(),
)))
}
} else {
Cow::Owned(s.to_string())
}
}
Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
},
FileEncodingHint::Gb18030 => match GB18030.decode(bytes, DecoderTrap::Strict) {
Ok(s) => Cow::Owned(s),
Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
},
}
}
#[inline]
fn find_keyword_end_backward(window: &[u8], within: usize, keyword: &[u8]) -> Option<usize> {
let klen = keyword.len();
let mut search_end = within;
while let Some(idx) = memrchr(b':', &window[..search_end]) {
if idx >= klen
&& &window[idx - klen..idx] == keyword
&& idx + 1 < window.len()
&& window[idx + 1] == b' '
{
return Some(idx - klen);
}
if idx == 0 {
break;
}
search_end = idx;
}
None
}
fn parse_indicators_from_bytes(ind: &[u8]) -> Option<PerformanceMetrics<'static>> {
if ind.is_empty() {
return None;
}
let mut out = PerformanceMetrics::default();
let mut found = false;
if let Some(idx) = FINDER_EXECTIME.find(ind) {
let ss = idx + 9;
if let Some(pi) = memchr(b'(', &ind[ss..]) {
let val = ind[ss..ss + pi].trim_ascii();
if let Ok(t) = unsafe { std::str::from_utf8_unchecked(val) }.parse::<f32>() {
out.exectime = t;
found = true;
}
}
}
if let Some(idx) = FINDER_ROWCOUNT.find(ind) {
let ss = idx + 9;
if let Some(pi) = memchr(b'(', &ind[ss..])
&& let Some(c) = atoi::<u32>(ind[ss..ss + pi].trim_ascii())
{
out.rowcount = c;
found = true;
}
}
if let Some(idx) = FINDER_EXEC_ID.find(ind) {
let ss = idx + 8;
let end = memchr(b'.', &ind[ss..])
.map(|i| ss + i)
.unwrap_or(ind.len());
if let Some(id) = atoi::<i64>(ind[ss..end].trim_ascii()) {
out.exec_id = id;
found = true;
}
}
found.then_some(out)
}
#[inline]
fn strip_ora_prefix(s: Cow<'_, str>) -> Cow<'_, str> {
match s {
Cow::Borrowed(inner) => Cow::Borrowed(inner.strip_prefix(": ").unwrap_or(inner)),
Cow::Owned(mut inner) => {
if inner.starts_with(": ") {
inner.drain(..2);
}
Cow::Owned(inner)
}
}
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct MetaParts<'a> {
pub ep: u8,
pub sess_id: Cow<'a, str>,
pub thrd_id: Cow<'a, str>,
pub username: Cow<'a, str>,
pub trxid: Cow<'a, str>,
pub statement: Cow<'a, str>,
pub appname: Cow<'a, str>,
pub client_ip: Cow<'a, str>,
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct PerformanceMetrics<'a> {
pub exectime: f32,
pub rowcount: u32,
pub exec_id: i64,
pub sql: Cow<'a, str>,
}