1use crate::types::{VectorMetric, VectorPrecision};
3use serde::{Deserialize, Serialize};
4
5pub mod llm_columns {
8 pub const CHUNK_ID: &str = "chunk_id";
9 pub const DOCUMENT_ID: &str = "document_id";
10 pub const CHUNK_INDEX: &str = "chunk_index";
11 pub const TOTAL_CHUNKS: &str = "total_chunks";
12 pub const CHUNK_TEXT: &str = "chunk_text";
13 pub const DOCUMENT_TITLE: &str = "document_title";
14 pub const SECTION_PATH: &str = "section_path";
15 pub const PRECEDING_CONTEXT: &str = "preceding_context";
16 pub const FOLLOWING_CONTEXT: &str = "following_context";
17 pub const DOCUMENT_SUMMARY: &str = "document_summary";
18 pub const CHUNK_SUMMARY: &str = "chunk_summary";
19 pub const SOURCE_URI: &str = "source_uri";
20 pub const PAGE_NUMBER: &str = "page_number";
21 pub const CREATED_AT: &str = "created_at";
22 pub const DOCUMENT_DATE: &str = "document_date";
23 pub const EMBEDDING: &str = "embedding";
24 pub const CONTEXT_EMBEDDING: &str = "context_embedding";
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct VectorStoragePolicy {
31 pub column_name: String,
32 pub dim: u32,
33 pub metric: VectorMetric,
34 pub precision: VectorPrecision,
35 pub pq: Option<PQConfig>,
36 pub keep_raw_for_reranking: bool,
37 #[serde(default)]
43 pub pre_normalize: bool,
44 #[serde(default)]
48 pub hnsw_m: Option<u32>,
49 #[serde(default)]
53 pub hnsw_ef_construction: Option<u32>,
54}
55
56impl VectorStoragePolicy {
57 pub fn default_f16(column: &str, dim: u32, metric: VectorMetric) -> Self {
58 Self {
59 column_name: column.to_string(),
60 dim,
61 metric,
62 precision: VectorPrecision::F16,
63 pq: None,
64 keep_raw_for_reranking: true,
65 pre_normalize: false,
66 hnsw_m: None,
67 hnsw_ef_construction: None,
68 }
69 }
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct PQConfig {
75 pub num_subvectors: usize,
77 pub bits_per_code: u8,
79 pub train_sample_size: usize,
81}
82
83pub struct LlmContextSchema;