use atoi::atoi;
use encoding::DecoderTrap;
use encoding::Encoding;
use encoding::all::GB18030;
use memchr::memchr;
use memchr::memmem::Finder;
use memchr::memrchr;
use simdutf8::basic::from_utf8 as simd_from_utf8;
use std::borrow::Cow;
use std::sync::LazyLock;
use crate::parser::FileEncodingHint;
static FINDER_EXECTIME: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"EXECTIME:"));
static FINDER_ROWCOUNT: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"ROWCOUNT:"));
static FINDER_EXEC_ID: LazyLock<Finder<'static>> = LazyLock::new(|| Finder::new(b"EXEC_ID:"));
const INDICATORS_WINDOW: usize = 256;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Sqllog<'a> {
pub ts: Cow<'a, str>,
pub meta_raw: Cow<'a, str>,
pub content_raw: Cow<'a, [u8]>,
pub tag: Option<Cow<'a, str>>,
pub(crate) encoding: FileEncodingHint,
}
impl<'a> Sqllog<'a> {
pub fn body(&self) -> Cow<'a, str> {
let split = self.find_indicators_split();
let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) }
}
#[inline]
pub fn body_len(&self) -> usize {
self.find_indicators_split()
}
#[inline]
pub fn body_bytes(&self) -> &[u8] {
&self.content_raw[..self.find_indicators_split()]
}
pub fn indicators_raw(&self) -> Option<Cow<'a, str>> {
let split = self.find_indicators_split();
let ind_bytes = &self.content_raw[split..];
if ind_bytes.is_empty() {
return None;
}
let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
Some(unsafe { decode_content_bytes(ind_bytes, is_borrowed, self.encoding) })
}
pub fn parse_indicators(&self) -> Option<PerformanceMetrics<'static>> {
let ind_bytes = &self.content_raw[self.find_indicators_split()..];
if ind_bytes.is_empty() {
return None;
}
parse_indicators_from_bytes(ind_bytes)
}
#[inline(always)]
pub fn parse_performance_metrics(&self) -> PerformanceMetrics<'a> {
let split = self.find_indicators_split();
let is_borrowed = matches!(&self.content_raw, Cow::Borrowed(_));
let sql_raw =
unsafe { decode_content_bytes(&self.content_raw[..split], is_borrowed, self.encoding) };
let sql = if self.tag.as_deref() == Some("ORA") {
strip_ora_prefix(sql_raw)
} else {
sql_raw
};
let mut pm = parse_indicators_from_bytes(&self.content_raw[split..]).unwrap_or_default();
pm.sql = sql;
pm
}
pub fn parse_meta(&self) -> MetaParts<'a> {
let meta_bytes = self.meta_raw.as_bytes();
let mut meta = MetaParts::default();
let len = meta_bytes.len();
let is_borrowed = matches!(&self.meta_raw, Cow::Borrowed(_));
let to_cow = |bytes: &[u8]| -> Cow<'a, str> {
if is_borrowed {
unsafe {
Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
bytes.as_ptr(),
bytes.len(),
)))
}
} else {
Cow::Owned(
std::str::from_utf8(bytes)
.expect("meta_raw is always valid UTF-8")
.to_string(),
)
}
};
let mut idx = 0;
while idx < len {
while idx < len && meta_bytes[idx] == b' ' {
idx += 1;
}
if idx >= len {
break;
}
let start = idx;
while idx < len && meta_bytes[idx] != b' ' {
idx += 1;
}
let part = &meta_bytes[start..idx];
if part.len() > 4
&& part[0] == b'E'
&& part[1] == b'P'
&& part[2] == b'['
&& part[part.len() - 1] == b']'
{
if let Some(ep) = atoi::<u8>(&part[3..part.len() - 1]) {
meta.ep = ep;
}
continue;
}
if let Some(sep) = memchr(b':', part) {
let key = &part[..sep];
let val = &part[sep + 1..];
match key {
b"sess" => meta.sess_id = to_cow(val),
b"thrd" => meta.thrd_id = to_cow(val),
b"user" => meta.username = to_cow(val),
b"trxid" => meta.trxid = to_cow(val),
b"stmt" => meta.statement = to_cow(val),
b"ip" => meta.client_ip = to_cow(val),
b"appname" => {
if !val.is_empty() {
meta.appname = to_cow(val);
} else {
let mut peek = idx;
while peek < len && meta_bytes[peek] == b' ' {
peek += 1;
}
if peek < len {
let peek_start = peek;
while peek < len && meta_bytes[peek] != b' ' {
peek += 1;
}
let next = &meta_bytes[peek_start..peek];
if !(next.starts_with(b"ip:") || next.starts_with(b"ip::")) {
meta.appname = to_cow(next);
idx = peek;
}
}
}
}
_ => {}
}
}
}
meta
}
fn find_indicators_split(&self) -> usize {
let data = &self.content_raw;
let len = data.len();
let last_meaningful = data
.iter()
.rev()
.find(|&&b| b != b'\n' && b != b'\r')
.copied();
if last_meaningful != Some(b'.') && last_meaningful != Some(b')') {
return len;
}
let start = len.saturating_sub(INDICATORS_WINDOW);
let window = &data[start..];
let earliest = scan_earliest_indicator(window);
let split = start + earliest;
if split < len && parse_indicators_from_bytes(&data[split..]).is_none() {
return len;
}
split
}
}
fn scan_earliest_indicator(window: &[u8]) -> usize {
let mut exectime_pos: Option<usize> = None;
let mut rowcount_pos: Option<usize> = None;
let mut exec_id_pos: Option<usize> = None;
let mut search_end = window.len();
while search_end > 0 {
if exectime_pos.is_some() && rowcount_pos.is_some() && exec_id_pos.is_some() {
break;
}
match memrchr(b':', &window[..search_end]) {
None => break,
Some(colon) => {
let prefix = &window[..colon];
if exectime_pos.is_none() && prefix.ends_with(b"EXECTIME") {
exectime_pos = Some(colon - 8);
} else if rowcount_pos.is_none() && prefix.ends_with(b"ROWCOUNT") {
rowcount_pos = Some(colon - 8);
} else if exec_id_pos.is_none() && prefix.ends_with(b"EXEC_ID") {
exec_id_pos = Some(colon - 7);
}
search_end = colon;
}
}
}
[exectime_pos, rowcount_pos, exec_id_pos]
.into_iter()
.flatten()
.min()
.unwrap_or(window.len())
}
#[inline]
unsafe fn decode_content_bytes<'a>(
bytes: &[u8],
is_borrowed: bool,
encoding: FileEncodingHint,
) -> Cow<'a, str> {
match encoding {
FileEncodingHint::Utf8 => {
if is_borrowed {
unsafe {
Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
bytes.as_ptr(),
bytes.len(),
)))
}
} else {
unsafe { Cow::Owned(std::str::from_utf8_unchecked(bytes).to_string()) }
}
}
FileEncodingHint::Auto => match simd_from_utf8(bytes) {
Ok(_) => {
if is_borrowed {
unsafe {
Cow::Borrowed(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
bytes.as_ptr(),
bytes.len(),
)))
}
} else {
unsafe { Cow::Owned(std::str::from_utf8_unchecked(bytes).to_string()) }
}
}
Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
},
FileEncodingHint::Gb18030 => match GB18030.decode(bytes, DecoderTrap::Strict) {
Ok(s) => Cow::Owned(s),
Err(_) => Cow::Owned(String::from_utf8_lossy(bytes).into_owned()),
},
}
}
fn parse_indicators_from_bytes(ind: &[u8]) -> Option<PerformanceMetrics<'static>> {
if ind.is_empty() {
return None;
}
let mut out = PerformanceMetrics::default();
let mut found = false;
if let Some(idx) = FINDER_EXECTIME.find(ind) {
let ss = idx + 9;
if let Some(pi) = memchr(b'(', &ind[ss..]) {
let val = ind[ss..ss + pi].trim_ascii();
if let Ok(t) = fast_float::parse::<f32, _>(val) {
out.exectime = t;
found = true;
}
}
}
if let Some(idx) = FINDER_ROWCOUNT.find(ind) {
let ss = idx + 9;
if let Some(pi) = memchr(b'(', &ind[ss..])
&& let Some(c) = atoi::<u32>(ind[ss..ss + pi].trim_ascii())
{
out.rowcount = c;
found = true;
}
}
if let Some(idx) = FINDER_EXEC_ID.find(ind) {
let ss = idx + 8;
let end = memchr(b'.', &ind[ss..])
.map(|i| ss + i)
.unwrap_or(ind.len());
if let Some(id) = atoi::<i64>(ind[ss..end].trim_ascii()) {
out.exec_id = id;
found = true;
}
}
found.then_some(out)
}
#[inline]
fn strip_ora_prefix(s: Cow<'_, str>) -> Cow<'_, str> {
match s {
Cow::Borrowed(inner) => Cow::Borrowed(inner.strip_prefix(": ").unwrap_or(inner)),
Cow::Owned(mut inner) => {
if inner.starts_with(": ") {
inner.drain(..2);
}
Cow::Owned(inner)
}
}
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct MetaParts<'a> {
pub ep: u8,
pub sess_id: Cow<'a, str>,
pub thrd_id: Cow<'a, str>,
pub username: Cow<'a, str>,
pub trxid: Cow<'a, str>,
pub statement: Cow<'a, str>,
pub appname: Cow<'a, str>,
pub client_ip: Cow<'a, str>,
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct PerformanceMetrics<'a> {
pub exectime: f32,
pub rowcount: u32,
pub exec_id: i64,
pub sql: Cow<'a, str>,
}