use crate::error::{AilakeError, AilakeResult};
use crate::types::{EmbeddingModelInfo, VectorMetric, VectorModality, VectorPrecision};
use serde::{Deserialize, Serialize};
pub mod llm_columns {
pub const CHUNK_ID: &str = "chunk_id";
pub const DOCUMENT_ID: &str = "document_id";
pub const CHUNK_INDEX: &str = "chunk_index";
pub const TOTAL_CHUNKS: &str = "total_chunks";
pub const CHUNK_TEXT: &str = "chunk_text";
pub const DOCUMENT_TITLE: &str = "document_title";
pub const SECTION_PATH: &str = "section_path";
pub const PRECEDING_CONTEXT: &str = "preceding_context";
pub const FOLLOWING_CONTEXT: &str = "following_context";
pub const DOCUMENT_SUMMARY: &str = "document_summary";
pub const CHUNK_SUMMARY: &str = "chunk_summary";
pub const SOURCE_URI: &str = "source_uri";
pub const PAGE_NUMBER: &str = "page_number";
pub const CREATED_AT: &str = "created_at";
pub const DOCUMENT_DATE: &str = "document_date";
pub const EMBEDDING: &str = "embedding";
pub const CONTEXT_EMBEDDING: &str = "context_embedding";
}
pub fn now_ns() -> i64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as i64)
.unwrap_or(0)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VectorStoragePolicy {
pub column_name: String,
pub dim: u32,
pub metric: VectorMetric,
pub precision: VectorPrecision,
pub pq: Option<PQConfig>,
pub keep_raw_for_reranking: bool,
#[serde(default)]
pub pre_normalize: bool,
#[serde(default)]
pub hnsw_m: Option<u32>,
#[serde(default)]
pub hnsw_ef_construction: Option<u32>,
#[serde(default)]
pub ivf_residual: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub embedding_model: Option<EmbeddingModelInfo>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub modality: Option<VectorModality>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub partition_by: Option<String>,
#[serde(skip)]
pub partition_value: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub partition_column_type: Option<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub partition_fields: Vec<PartitionDef>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct PartitionDef {
pub column: String,
pub transform: String,
pub column_type: String,
}
impl PartitionDef {
pub fn identity(column: impl Into<String>, column_type: impl Into<String>) -> Self {
Self {
column: column.into(),
transform: "identity".into(),
column_type: column_type.into(),
}
}
pub fn truncate(
column: impl Into<String>,
width: usize,
column_type: impl Into<String>,
) -> Self {
Self {
column: column.into(),
transform: format!("truncate[{width}]"),
column_type: column_type.into(),
}
}
pub fn apply(&self, raw: &str) -> String {
if let Some(w) = self.truncate_width() {
if matches!(self.column_type.as_str(), "int" | "long" | "integer") {
if let Ok(n) = raw.parse::<i64>() {
return (n - n.rem_euclid(w as i64)).to_string();
}
}
raw.chars().take(w).collect()
} else {
raw.to_string()
}
}
fn truncate_width(&self) -> Option<usize> {
self.transform
.strip_prefix("truncate[")
.and_then(|s| s.strip_suffix(']'))
.and_then(|s| s.parse().ok())
}
}
impl VectorStoragePolicy {
pub fn default_f16(column: &str, dim: u32, metric: VectorMetric) -> Self {
Self {
column_name: column.to_string(),
dim,
metric,
precision: VectorPrecision::F16,
pq: None,
keep_raw_for_reranking: true,
pre_normalize: false,
hnsw_m: None,
hnsw_ef_construction: None,
ivf_residual: false,
embedding_model: None,
modality: None,
partition_by: None,
partition_value: None,
partition_column_type: None,
partition_fields: vec![],
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PQConfig {
pub num_subvectors: usize,
pub bits_per_code: u8,
pub train_sample_size: usize,
}
pub struct LlmContextSchema;
pub mod multimodal_columns {
pub const MEDIA_URI: &str = "media_uri";
pub const MEDIA_MIME: &str = "media_mime";
pub const MEDIA_CAPTION: &str = "media_caption";
pub const IMAGE_EMBEDDING: &str = "image_embedding";
pub const AUDIO_TRANSCRIPT: &str = "audio_transcript";
pub const THUMBNAIL_B64: &str = "thumbnail_b64";
}
pub struct MultimodalContextSchema;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ToolCallOutcome {
Success,
Failure,
Timeout,
}
impl ToolCallOutcome {
pub fn as_str(self) -> &'static str {
match self {
Self::Success => "success",
Self::Failure => "failure",
Self::Timeout => "timeout",
}
}
}
impl std::fmt::Display for ToolCallOutcome {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
impl std::str::FromStr for ToolCallOutcome {
type Err = AilakeError;
fn from_str(s: &str) -> AilakeResult<Self> {
match s {
"success" => Ok(Self::Success),
"failure" => Ok(Self::Failure),
"timeout" => Ok(Self::Timeout),
other => Err(AilakeError::InvalidArgument(format!(
"unknown ToolCallOutcome '{other}' (valid: success, failure, timeout)"
))),
}
}
}
pub mod tool_call_columns {
pub const AGENT_ID: &str = "agent_id";
pub const SESSION_ID: &str = "session_id";
pub const STEP_INDEX: &str = "step_index";
pub const TOOL_NAME: &str = "tool_name";
pub const TOOL_INPUT_JSON: &str = "tool_input_json";
pub const TOOL_OUTPUT_JSON: &str = "tool_output_json";
pub const OUTCOME: &str = "outcome";
pub const LATENCY_MS: &str = "latency_ms";
}
pub struct ToolCallSchema;